1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 /* AIX requires this to be the first thing in the file. */
23 #define alloca __builtin_alloca
31 #ifndef alloca /* predefined by HP cc +Olibcalls */
46 #include "debug-print.h"
49 #include "file-handle.h"
59 /* Utility function. */
61 /* FIXME: Either REPEATING DATA must be the last transformation, or we
62 must multiplex the transformations that follow (i.e., perform them
63 for every case that we produce from a repetition instance).
64 Currently we do neither. We should do one or the other. */
66 /* Describes how to parse one variable. */
69 struct dls_var_spec *next;
70 struct variable *v; /* Associated variable. Used only in
71 parsing. Not safe later. */
72 char name[9]; /* Free-format: Name of variable. */
73 int rec; /* Fixed-format: Record number (1-based). */
74 int fc, lc; /* Fixed-format: Column numbers in record. */
75 struct fmt_spec input; /* Input format of this field. */
76 int fv; /* First value in case. */
77 int type; /* 0=numeric, >0=width of alpha field. */
80 /* Constants for DATA LIST type. */
81 /* Must match table in cmd_data_list(). */
89 /* DATA LIST private data structure. */
93 struct dls_var_spec *spec; /* Variable parsing specifications. */
94 struct file_handle *handle; /* Input file, never NULL. */
95 /* Do not reorder preceding fields. */
97 int type; /* A DLS_* constant. */
98 struct variable *end; /* Variable specified on END subcommand. */
99 int eof; /* End of file encountered. */
100 int nrec; /* Number of records. */
103 /* Holds information on parsing the data file. */
104 static struct data_list_pgm dls;
106 /* Pointer to a pointer to where the first dls_var_spec should go. */
107 static struct dls_var_spec **first;
109 /* Last dls_var_spec in the chain. Used for building the linked-list. */
110 static struct dls_var_spec *next;
112 static int parse_fixed (void);
113 static int parse_free (void);
114 static void dump_fixed_table (void);
115 static void dump_free_table (void);
116 static void destroy_dls (struct trns_header *);
117 static int read_one_case (struct trns_header *, struct ccase *);
119 /* Message title for REPEATING DATA. */
120 #define RPD_ERR "REPEATING DATA: "
125 /* 0=print no table, 1=print table. (TABLE subcommand.) */
128 lex_match_id ("DATA");
129 lex_match_id ("LIST");
131 if (vfm_source != &input_program_source
132 && vfm_source != &file_type_source)
133 discard_variables ();
135 dls.handle = default_handle;
146 if (lex_match_id ("FILE"))
149 dls.handle = fh_parse_file_handle ();
152 if (vfm_source == &file_type_source && dls.handle != default_handle)
154 msg (SE, _("DATA LIST may not use a different file from "
155 "that specified on its surrounding FILE TYPE."));
159 else if (lex_match_id ("RECORDS"))
163 if (!lex_force_int ())
165 dls.nrec = lex_integer ();
169 else if (lex_match_id ("END"))
173 msg (SE, _("The END subcommand may only be specified once."));
178 if (!lex_force_id ())
180 dls.end = find_variable (tokid);
182 dls.end = force_create_variable (&default_dict, tokid, NUMERIC, 0);
185 else if (token == T_ID)
187 /* Must match DLS_* constants. */
188 static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE",
193 for (p = id; *p; p++)
194 if (lex_id_match (*p, tokid))
209 msg (SE, _("Only one of FIXED, FREE, or LIST may "
226 default_handle = dls.handle;
229 dls.type = DLS_FIXED;
233 if (dls.type == DLS_FREE)
239 if (dls.type == DLS_FIXED)
254 if (vfm_source != NULL)
256 struct data_list_pgm *new_pgm;
258 dls.h.proc = read_one_case;
259 dls.h.free = destroy_dls;
261 new_pgm = xmalloc (sizeof *new_pgm);
262 memcpy (new_pgm, &dls, sizeof *new_pgm);
263 add_transformation ((struct trns_header *) new_pgm);
266 vfm_source = &data_list_source;
272 append_var_spec (struct dls_var_spec *spec)
275 *first = next = xmalloc (sizeof *spec);
277 next = next->next = xmalloc (sizeof *spec);
279 memcpy (next, spec, sizeof *spec);
283 /* Fixed-format parsing. */
285 /* Used for chaining together fortran-like format specifiers. */
288 struct fmt_list *next;
291 struct fmt_list *down;
294 /* Used as "local" variables among the fixed-format parsing funcs. If
295 it were guaranteed that PSPP were going to be compiled by gcc,
296 I'd make all these functions a single set of nested functions. */
299 char **name; /* Variable names. */
300 int nname; /* Number of names. */
301 int cname; /* dump_fmt_list: index of next name to use. */
303 int recno; /* Index of current record. */
304 int sc; /* 1-based column number of starting column for
305 next field to output. */
307 struct dls_var_spec spec; /* Next specification to output. */
308 int fc, lc; /* First, last column in set of fields specified
311 int level; /* Nesting level in fixed_parse_fortran(). */
315 static int fixed_parse_compatible (void);
316 static struct fmt_list *fixed_parse_fortran (void);
328 while (lex_match ('/'))
331 if (lex_integer_p ())
333 if (lex_integer () < fx.recno)
335 msg (SE, _("The record number specified, %ld, is "
336 "before the previous record, %d. Data "
337 "fields must be listed in order of "
338 "increasing record number."),
339 lex_integer (), fx.recno - 1);
343 fx.recno = lex_integer ();
348 fx.spec.rec = fx.recno;
350 if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE))
355 if (!fixed_parse_compatible ())
358 else if (token == '(')
362 if (!fixed_parse_fortran ())
367 msg (SE, _("SPSS-like or FORTRAN-like format "
368 "specification expected after variable names."));
372 for (i = 0; i < fx.nname; i++)
376 if (dls.nrec && next->rec > dls.nrec)
378 msg (SE, _("Variables are specified on records that "
379 "should not exist according to RECORDS subcommand."));
383 dls.nrec = next->rec;
386 lex_error (_("expecting end of command"));
392 for (i = 0; i < fx.nname; i++)
399 fixed_parse_compatible (void)
404 if (!lex_force_int ())
407 fx.fc = lex_integer ();
410 msg (SE, _("Column positions for fields must be positive."));
415 lex_negative_to_dash ();
418 if (!lex_force_int ())
420 fx.lc = lex_integer ();
423 msg (SE, _("Column positions for fields must be positive."));
426 else if (fx.lc < fx.fc)
428 msg (SE, _("The ending column for a field must be "
429 "greater than the starting column."));
438 fx.spec.input.w = fx.lc - fx.fc + 1;
441 struct fmt_desc *fdp;
447 fx.spec.input.type = parse_format_specifier_name (&cp, 0);
448 if (fx.spec.input.type == -1)
452 msg (SE, _("A format specifier on this line "
453 "has extra characters on the end."));
461 fx.spec.input.type = FMT_F;
463 if (lex_integer_p ())
465 if (lex_integer () < 1)
467 msg (SE, _("The value for number of decimal places "
468 "must be at least 1."));
472 fx.spec.input.d = lex_integer ();
478 fdp = &formats[fx.spec.input.type];
479 if (fdp->n_args < 2 && fx.spec.input.d)
481 msg (SE, _("Input format %s doesn't accept decimal places."),
486 if (fx.spec.input.d > 16)
487 fx.spec.input.d = 16;
489 if (!lex_force_match (')'))
494 fx.spec.input.type = FMT_F;
500 if ((fx.lc - fx.fc + 1) % fx.nname)
502 msg (SE, _("The %d columns %d-%d "
503 "can't be evenly divided into %d fields."),
504 fx.lc - fx.fc + 1, fx.fc, fx.lc, fx.nname);
508 dividend = (fx.lc - fx.fc + 1) / fx.nname;
509 fx.spec.input.w = dividend;
510 if (!check_input_specifier (&fx.spec.input))
513 for (i = 0; i < fx.nname; i++)
518 if (fx.spec.input.type == FMT_A || fx.spec.input.type == FMT_AHEX)
523 v = create_variable (&default_dict, fx.name[i], type, dividend);
526 convert_fmt_ItoO (&fx.spec.input, &v->print);
531 v = find_variable (fx.name[i]);
535 msg (SE, _("%s is a duplicate variable name."), fx.name[i]);
540 msg (SE, _("There is already a variable %s of a "
545 if (type == ALPHA && dividend != v->width)
547 msg (SE, _("There is already a string variable %s of a "
548 "different width."), fx.name[i]);
554 fx.spec.fc = fx.fc + dividend * i;
555 fx.spec.lc = fx.spec.fc + dividend - 1;
557 fx.spec.type = v->type == NUMERIC ? 0 : v->width;
558 append_var_spec (&fx.spec);
563 /* Destroy a format list and, optionally, all its sublists. */
565 destroy_fmt_list (struct fmt_list *f, int recurse)
567 struct fmt_list *next;
572 if (recurse && f->f.type == FMT_DESCEND)
573 destroy_fmt_list (f->down, 1);
578 /* Takes a hierarchically structured fmt_list F as constructed by
579 fixed_parse_fortran(), and flattens it into a linear list of
582 dump_fmt_list (struct fmt_list *f)
586 for (; f; f = f->next)
587 if (f->f.type == FMT_X)
589 else if (f->f.type == FMT_T)
591 else if (f->f.type == FMT_NEWREC)
593 fx.recno += f->count;
597 for (i = 0; i < f->count; i++)
598 if (f->f.type == FMT_DESCEND)
600 if (!dump_fmt_list (f->down))
608 type = (formats[f->f.type].cat & FCAT_STRING) ? ALPHA : NUMERIC;
609 if (fx.cname >= fx.nname)
611 msg (SE, _("The number of format "
612 "specifications exceeds the number of "
613 "variable names given."));
617 fx.spec.v = v = create_variable (&default_dict,
622 msg (SE, _("%s is a duplicate variable name."), fx.name[i]);
626 fx.spec.input = f->f;
627 convert_fmt_ItoO (&fx.spec.input, &v->print);
630 fx.spec.rec = fx.recno;
632 fx.spec.lc = fx.sc + f->f.w - 1;
634 fx.spec.type = v->type == NUMERIC ? 0 : v->width;
635 append_var_spec (&fx.spec);
642 /* Calls itself recursively to parse nested levels of parentheses.
643 Returns to its original caller: NULL, to indicate error; non-NULL,
644 but nothing useful, to indicate success (it returns a free()'d
646 static struct fmt_list *
647 fixed_parse_fortran (void)
649 struct fmt_list *head;
650 struct fmt_list *fl = NULL;
652 lex_get (); /* Skip opening parenthesis. */
656 fl = fl->next = xmalloc (sizeof *fl);
658 head = fl = xmalloc (sizeof *fl);
660 if (lex_integer_p ())
662 fl->count = lex_integer ();
670 fl->f.type = FMT_DESCEND;
672 fl->down = fixed_parse_fortran ();
677 else if (lex_match ('/'))
678 fl->f.type = FMT_NEWREC;
679 else if (!parse_format_specifier (&fl->f, 1)
680 || !check_input_specifier (&fl->f))
692 dump_fmt_list (head);
693 if (fx.cname < fx.nname)
695 msg (SE, _("There aren't enough format specifications "
696 "to match the number of variable names given."));
699 destroy_fmt_list (head, 1);
704 destroy_fmt_list (head, 0);
709 /* Displays a table giving information on fixed-format variable
710 parsing on DATA LIST. */
711 /* FIXME: The `Columns' column should be divided into three columns,
712 one for the starting column, one for the dash, one for the ending
713 column; then right-justify the starting column and left-justify the
716 dump_fixed_table (void)
718 struct dls_var_spec *spec;
721 const char *filename;
724 for (i = 0, spec = *first; spec; spec = spec->next)
726 t = tab_create (4, i + 1, 0);
727 tab_columns (t, TAB_COL_DOWN, 1);
728 tab_headers (t, 0, 0, 1, 0);
729 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
730 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
731 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
732 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
733 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
734 tab_hline (t, TAL_2, 0, 3, 1);
735 tab_dim (t, tab_natural_dimensions);
737 for (i = 1, spec = *first; spec; spec = spec->next, i++)
739 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
740 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
741 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
743 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
744 fmt_to_string (&spec->input));
747 if (*first == dls.spec)
749 filename = fh_handle_name (dls.handle);
750 if (filename == NULL)
752 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
753 sprintf (buf, (dls.handle != inline_file
754 ? _("Reading %d record%s from file %s.")
755 : _("Reading %d record%s from the command file.")),
756 dls.nrec, dls.nrec != 1 ? "s" : "", filename);
760 buf = local_alloc (strlen (_("Occurrence data specifications.")) + 1);
761 strcpy (buf, _("Occurrence data specifications."));
764 tab_title (t, 0, buf);
766 fh_handle_name (NULL);
770 /* Free-format parsing. */
775 struct dls_var_spec spec;
776 struct fmt_spec in, out;
785 if (!parse_DATA_LIST_vars (&name, &nname, PV_NONE))
789 if (!parse_format_specifier (&in, 0) || !check_input_specifier (&in))
791 if (!lex_force_match (')'))
793 convert_fmt_ItoO (&in, &out);
805 if (in.type == FMT_A || in.type == FMT_AHEX)
809 for (i = 0; i < nname; i++)
813 spec.v = v = create_variable (&default_dict, name[i], type, in.w);
816 msg (SE, _("%s is a duplicate variable name."), name[i]);
820 v->print = v->write = out;
822 strcpy (spec.name, name[i]);
824 spec.type = type == NUMERIC ? 0 : v->width;
825 append_var_spec (&spec);
827 for (i = 0; i < nname; i++)
833 lex_error (_("expecting end of command"));
837 for (i = 0; i < nname; i++)
843 /* Displays a table giving information on free-format variable parsing
846 dump_free_table (void)
852 struct dls_var_spec *spec;
853 for (i = 0, spec = dls.spec; spec; spec = spec->next)
857 t = tab_create (2, i + 1, 0);
858 tab_columns (t, TAB_COL_DOWN, 1);
859 tab_headers (t, 0, 0, 1, 0);
860 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
861 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
862 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
863 tab_hline (t, TAL_2, 0, 1, 1);
864 tab_dim (t, tab_natural_dimensions);
867 struct dls_var_spec *spec;
869 for (i = 1, spec = dls.spec; spec; spec = spec->next, i++)
871 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
872 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
877 const char *filename;
879 filename = fh_handle_name (dls.handle);
880 if (filename == NULL)
883 (dls.handle != inline_file
884 ? _("Reading free-form data from file %s.")
885 : _("Reading free-form data from the command file.")),
890 fh_handle_name (NULL);
893 /* Input procedure. */
895 /* Pointer to relevant parsing data. Static just to avoid passing it
897 static struct data_list_pgm *dlsp;
899 /* Extracts a field from the current position in the current record.
900 Fields can be unquoted or quoted with single- or double-quote
901 characters. *RET_LEN is set to the field length, *RET_CP is set to
902 the field itself. After parsing the field, sets the current
903 position in the record to just past the field. Returns 0 on
904 failure or a 1-based column number indicating the beginning of the
907 cut_field (char **ret_cp, int *ret_len)
912 cp = dfm_get_record (dlsp->handle, &len);
918 /* Skip leading whitespace and commas. */
919 while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep)
924 /* Three types of fields: quoted with ', quoted with ", unquoted. */
925 if (*cp == '\'' || *cp == '"')
930 while (cp < ep && *cp != quote)
932 *ret_len = cp - *ret_cp;
936 msg (SW, _("Scope of string exceeds line."));
941 while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',')
943 *ret_len = cp - *ret_cp;
947 int beginning_column;
949 dfm_set_record (dlsp->handle, *ret_cp);
950 beginning_column = dfm_get_cur_col (dlsp->handle) + 1;
952 dfm_set_record (dlsp->handle, cp);
954 return beginning_column;
958 static int read_from_data_list_fixed (void);
959 static int read_from_data_list_free (void);
960 static int read_from_data_list_list (void);
961 static int do_reading (int flag);
963 /* FLAG==0: reads any number of cases into temp_case and calls
964 write_case() for each one, returns garbage. FLAG!=0: reads one
965 case into temp_case and returns -2 on eof, -1 otherwise.
966 Uses dlsp as the relevant parsing description. */
968 do_reading (int flag)
974 dfm_push (dlsp->handle);
979 func = read_from_data_list_fixed;
982 func = read_from_data_list_free;
985 func = read_from_data_list_list;
997 msg (SE, _("Attempt to read past end of file."));
1006 if (dlsp->end != NULL)
1010 printf ("end of file, setting %s to 1\n", dlsp->end->name);
1011 temp_case->data[dlsp->end->fv].f = 1.0;
1016 printf ("not end of file, setting %s to 0\n", dlsp->end->name);
1017 temp_case->data[dlsp->end->fv].f = 0.0;
1023 while (func () != -2)
1026 debug_printf ((_("abort in write_case()\n")));
1029 fh_close_handle (dlsp->handle);
1031 dfm_pop (dlsp->handle);
1036 /* Reads a case from the data file and parses it according to
1037 fixed-format syntax rules. */
1039 read_from_data_list_fixed (void)
1041 struct dls_var_spec *var_spec = dlsp->spec;
1044 if (!dfm_get_record (dlsp->handle, NULL))
1046 for (i = 1; i <= dlsp->nrec; i++)
1049 char *line = dfm_get_record (dlsp->handle, &len);
1053 /* Note that this can't occur on the first record. */
1054 msg (SW, _("Partial case of %d of %d records discarded."),
1059 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1063 data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc);
1064 di.v = &temp_case->data[var_spec->fv];
1066 di.f1 = var_spec->fc;
1067 di.format = var_spec->input;
1072 dfm_fwd_record (dlsp->handle);
1078 /* Reads a case from the data file and parses it according to
1079 free-format syntax rules. */
1081 read_from_data_list_free (void)
1083 struct dls_var_spec *var_spec;
1087 for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next)
1091 /* Cut out a field and read in a new record if necessary. */
1094 column = cut_field (&field, &len);
1098 if (dfm_get_record (dlsp->handle, NULL))
1099 dfm_fwd_record (dlsp->handle);
1100 if (!dfm_get_record (dlsp->handle, NULL))
1102 if (var_spec != dlsp->spec)
1103 msg (SW, _("Partial case discarded. The first variable "
1104 "missing was %s."), var_spec->name);
1114 di.v = &temp_case->data[var_spec->fv];
1117 di.format = var_spec->input;
1124 /* Reads a case from the data file and parses it according to
1125 list-format syntax rules. */
1127 read_from_data_list_list (void)
1129 struct dls_var_spec *var_spec;
1133 if (!dfm_get_record (dlsp->handle, NULL))
1136 for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next)
1138 /* Cut out a field and check for end-of-line. */
1139 int column = cut_field (&field, &len);
1144 msg (SW, _("Missing value(s) for all variables from %s onward. "
1145 "These will be filled with the system-missing value "
1146 "or blanks, as appropriate."),
1148 for (; var_spec; var_spec = var_spec->next)
1149 if (!var_spec->type)
1150 temp_case->data[var_spec->fv].f = SYSMIS;
1152 memset (temp_case->data[var_spec->fv].s, ' ', var_spec->type);
1161 di.v = &temp_case->data[var_spec->fv];
1164 di.format = var_spec->input;
1169 dfm_fwd_record (dlsp->handle);
1173 /* Destroys DATA LIST transformation or input program PGM. */
1175 destroy_dls (struct trns_header *pgm)
1177 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1178 struct dls_var_spec *iter, *next;
1187 fh_close_handle (dls->handle);
1190 /* Note that since this is exclusively an input program, C is
1191 guaranteed to be temp_case. */
1193 read_one_case (struct trns_header *t, struct ccase *c unused)
1195 dlsp = (struct data_list_pgm *) t;
1196 return do_reading (1);
1199 /* Reads all the records from the data file and passes them to
1202 data_list_source_read (void)
1208 /* Destroys the source's internal data. */
1210 data_list_source_destroy_source (void)
1212 destroy_dls ((struct trns_header *) & dls);
1215 struct case_stream data_list_source =
1218 data_list_source_read,
1221 data_list_source_destroy_source,
1226 /* REPEATING DATA. */
1228 /* Represents a number or a variable. */
1229 struct rpd_num_or_var
1231 int num; /* Value, or 0. */
1232 struct variable *var; /* Variable, if number==0. */
1235 /* REPEATING DATA private data structure. */
1236 struct repeating_data_trns
1238 struct trns_header h;
1239 struct dls_var_spec *spec; /* Variable parsing specifications. */
1240 struct file_handle *handle; /* Input file, never NULL. */
1241 /* Do not reorder preceding fields. */
1243 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1244 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1245 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1246 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1247 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1248 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1249 int id_beg, id_end; /* ID subcommand, beginning & end columns. */
1250 struct variable *id_var; /* ID subcommand, DATA LIST variable. */
1251 struct fmt_spec id_spec; /* ID subcommand, input format spec. */
1254 /* Information about the transformation being parsed. */
1255 static struct repeating_data_trns rpd;
1257 static int read_one_set_of_repetitions (struct trns_header *, struct ccase *);
1258 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1259 static int parse_repeating_data (void);
1260 static void find_variable_input_spec (struct variable *v,
1261 struct fmt_spec *spec);
1263 /* Parses the REPEATING DATA command. */
1265 cmd_repeating_data (void)
1267 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1270 /* Bits are set when a particular subcommand has been seen. */
1273 lex_match_id ("REPEATING");
1274 lex_match_id ("DATA");
1276 assert (vfm_source == &input_program_source
1277 || vfm_source == &file_type_source);
1279 rpd.handle = default_handle;
1280 rpd.starts_beg.num = 0;
1281 rpd.starts_beg.var = NULL;
1282 rpd.starts_end = rpd.occurs = rpd.length = rpd.cont_beg
1283 = rpd.cont_end = rpd.starts_beg;
1284 rpd.id_beg = rpd.id_end = 0;
1294 if (lex_match_id ("FILE"))
1297 rpd.handle = fh_parse_file_handle ();
1300 if (rpd.handle != default_handle)
1302 msg (SE, _("REPEATING DATA must use the same file as its "
1303 "corresponding DATA LIST or FILE TYPE."));
1307 else if (lex_match_id ("STARTS"))
1312 msg (SE, _("STARTS subcommand given multiple times."));
1317 if (!parse_num_or_var (&rpd.starts_beg, "STARTS beginning column"))
1320 lex_negative_to_dash ();
1321 if (lex_match ('-'))
1323 if (!parse_num_or_var (&rpd.starts_end, "STARTS ending column"))
1326 /* Otherwise, rpd.starts_end is left uninitialized.
1327 This is okay. We will initialize it later from the
1328 record length of the file. We can't do this now
1329 because we can't be sure that the user has specified
1330 the file handle yet. */
1333 if (rpd.starts_beg.num != 0 && rpd.starts_end.num != 0
1334 && rpd.starts_beg.num > rpd.starts_end.num)
1336 msg (SE, _("STARTS beginning column (%d) exceeds "
1337 "STARTS ending column (%d)."),
1338 rpd.starts_beg.num, rpd.starts_end.num);
1342 else if (lex_match_id ("OCCURS"))
1347 msg (SE, _("OCCURS subcommand given multiple times."));
1352 if (!parse_num_or_var (&rpd.occurs, "OCCURS"))
1355 else if (lex_match_id ("LENGTH"))
1360 msg (SE, _("LENGTH subcommand given multiple times."));
1365 if (!parse_num_or_var (&rpd.length, "LENGTH"))
1368 else if (lex_match_id ("CONTINUED"))
1373 msg (SE, _("CONTINUED subcommand given multiple times."));
1378 if (!lex_match ('/'))
1380 if (!parse_num_or_var (&rpd.cont_beg, "CONTINUED beginning column"))
1383 lex_negative_to_dash ();
1385 && !parse_num_or_var (&rpd.cont_end,
1386 "CONTINUED ending column"))
1389 if (rpd.cont_beg.num != 0 && rpd.cont_end.num != 0
1390 && rpd.cont_beg.num > rpd.cont_end.num)
1392 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1393 "CONTINUED ending column (%d)."),
1394 rpd.cont_beg.num, rpd.cont_end.num);
1399 rpd.cont_beg.num = 1;
1401 else if (lex_match_id ("ID"))
1406 msg (SE, _("ID subcommand given multiple times."));
1411 if (!lex_force_int ())
1413 if (lex_integer () < 1)
1415 msg (SE, _("ID beginning column (%ld) must be positive."),
1419 rpd.id_beg = lex_integer ();
1422 lex_negative_to_dash ();
1424 if (lex_match ('-'))
1426 if (!lex_force_int ())
1428 if (lex_integer () < 1)
1430 msg (SE, _("ID ending column (%ld) must be positive."),
1434 if (lex_integer () < rpd.id_end)
1436 msg (SE, _("ID ending column (%ld) cannot be less than "
1437 "ID beginning column (%d)."),
1438 lex_integer (), rpd.id_beg);
1442 rpd.id_end = lex_integer ();
1445 else rpd.id_end = rpd.id_beg;
1447 if (!lex_force_match ('='))
1449 rpd.id_var = parse_variable ();
1450 if (rpd.id_var == NULL)
1453 find_variable_input_spec (rpd.id_var, &rpd.id_spec);
1455 else if (lex_match_id ("TABLE"))
1457 else if (lex_match_id ("NOTABLE"))
1459 else if (lex_match_id ("DATA"))
1467 if (!lex_force_match ('/'))
1471 /* Comes here when DATA specification encountered. */
1472 if ((seen & (1 | 2)) != (1 | 2))
1474 if ((seen & 1) == 0)
1475 msg (SE, _("Missing required specification STARTS."));
1476 if ((seen & 2) == 0)
1477 msg (SE, _("Missing required specification OCCURS."));
1481 /* Enforce ID restriction. */
1482 if ((seen & 16) && !(seen & 8))
1484 msg (SE, _("ID specified without CONTINUED."));
1488 /* Calculate starts_end, cont_end if necessary. */
1489 if (rpd.starts_end.num == 0 && rpd.starts_end.var == NULL)
1490 rpd.starts_end.num = fh_record_width (rpd.handle);
1491 if (rpd.cont_end.num == 0 && rpd.starts_end.var == NULL)
1492 rpd.cont_end.num = fh_record_width (rpd.handle);
1494 /* Calculate length if possible. */
1495 if ((seen & 4) == 0)
1497 struct dls_var_spec *iter;
1499 for (iter = rpd.spec; iter; iter = iter->next)
1501 if (iter->lc > rpd.length.num)
1502 rpd.length.num = iter->lc;
1504 assert (rpd.length.num != 0);
1508 if (!parse_repeating_data ())
1512 dump_fixed_table ();
1515 struct repeating_data_trns *new_trns;
1517 rpd.h.proc = read_one_set_of_repetitions;
1518 rpd.h.free = destroy_dls;
1520 new_trns = xmalloc (sizeof *new_trns);
1521 memcpy (new_trns, &rpd, sizeof *new_trns);
1522 add_transformation ((struct trns_header *) new_trns);
1525 return lex_end_of_command ();
1528 /* Because of the way that DATA LIST is structured, it's not trivial
1529 to determine what input format is associated with a given variable.
1530 This function finds the input format specification for variable V
1531 and puts it in SPEC. */
1533 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1537 for (i = 0; i < n_trns; i++)
1539 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1541 if (pgm->h.proc == read_one_case)
1543 struct dls_var_spec *iter;
1545 for (iter = pgm->spec; iter; iter = iter->next)
1548 *spec = iter->input;
1557 /* Parses a number or a variable name from the syntax file and puts
1558 the results in VALUE. Ensures that the number is at least 1; else
1559 emits an error based on MESSAGE. Returns nonzero only if
1562 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1567 value->var = parse_variable ();
1568 if (value->var == NULL)
1570 if (value->var->type == ALPHA)
1572 msg (SE, _("String variable not allowed here."));
1576 else if (lex_integer_p ())
1578 value->num = lex_integer ();
1582 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1588 msg (SE, _("Variable or integer expected for %s."), message);
1594 /* Parses data specifications for repeating data groups. Taken from
1595 parse_fixed(). Returns nonzero only if successful. */
1597 parse_repeating_data (void)
1604 while (token != '.')
1606 fx.spec.rec = fx.recno;
1608 if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE))
1613 if (!fixed_parse_compatible ())
1616 else if (token == '(')
1620 if (!fixed_parse_fortran ())
1625 msg (SE, _("SPSS-like or FORTRAN-like format "
1626 "specification expected after variable names."));
1630 for (i = 0; i < fx.nname; i++)
1636 lex_error (_("expecting end of command"));
1643 for (i = 0; i < fx.nname; i++)
1649 /* Obtains the real value for rpd_num_or_var N in case C and returns
1650 it. The valid range is nonnegative numbers, but numbers outside
1651 this range can be returned and should be handled by the caller as
1654 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1659 assert (n->num == 0);
1662 double v = c->data[n->var->fv].f;
1664 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1673 /* Parses one record of repeated data and outputs corresponding cases.
1674 Repeating data is present in line LINE having length LEN.
1675 Repeating data begins in column BEG and continues through column
1676 END inclusive (1-based columns); occurrences are offset OFS columns
1677 from each other. C is the case that will be filled in; T is the
1678 REPEATING DATA transformation. The record ID will be verified if
1679 COMPARE_ID is nonzero; if it is zero, then the record ID is
1680 initialized to the ID present in the case (assuming that ID
1681 location was specified by the user). Returns number of occurrences
1682 parsed up to the specified maximum of MAX_OCCURS. */
1684 rpd_parse_record (int beg, int end, int ofs, struct ccase *c,
1685 struct repeating_data_trns *t,
1686 char *line, int len, int compare_id, int max_occurs)
1691 /* Handle record ID values. */
1694 static union value comparator;
1700 data_in_finite_line (&di, line, len, t->id_beg, t->id_end);
1704 di.format = t->id_spec;
1710 if (compare_id == 0)
1712 else if ((t->id_var->type == NUMERIC && comparator.f != v.f)
1713 || (t->id_var->type == ALPHA
1714 && strncmp (comparator.s, v.s, t->id_var->width)))
1719 if (!data_out (comp_str, &t->id_var->print, &comparator))
1721 if (!data_out (v_str, &t->id_var->print, &v))
1724 comp_str[t->id_var->print.w] = v_str[t->id_var->print.w] = 0;
1727 _("Mismatched case ID (%s). Expected value was %s."),
1734 /* Iterate over the set of expected occurrences and record each of
1735 them as a separate case. FIXME: We need to execute any
1736 transformations that follow the current one. */
1740 for (occurrences = 0; occurrences < max_occurs; )
1742 if (cur + ofs > end + 1)
1747 struct dls_var_spec *var_spec = t->spec;
1749 for (; var_spec; var_spec = var_spec->next)
1751 int fc = var_spec->fc - 1 + cur;
1752 int lc = var_spec->lc - 1 + cur;
1754 if (fc > len && !warned && var_spec->input.type != FMT_A)
1759 _("Variable %s startging in column %d extends "
1760 "beyond physical record length of %d."),
1761 var_spec->v->name, fc, len);
1767 data_in_finite_line (&di, line, len, fc, lc);
1768 di.v = &c->data[var_spec->fv];
1771 di.format = var_spec->input;
1789 /* Analogous to read_one_case; reads one set of repetitions of the
1790 elements in the REPEATING DATA structure. Returns -1 on success,
1791 -2 on end of file or on failure. */
1793 read_one_set_of_repetitions (struct trns_header *trns, struct ccase *c)
1795 dfm_push (dlsp->handle);
1798 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1800 char *line; /* Current record. */
1801 int len; /* Length of current record. */
1803 int starts_beg; /* Starting column. */
1804 int starts_end; /* Ending column. */
1805 int occurs; /* Number of repetitions. */
1806 int length; /* Length of each occurrence. */
1807 int cont_beg; /* Starting column for continuation lines. */
1808 int cont_end; /* Ending column for continuation lines. */
1810 int occurs_left; /* Number of occurrences remaining. */
1812 int code; /* Return value from rpd_parse_record(). */
1814 int skip_first_record = 0;
1816 /* Read the current record. */
1817 dfm_bkwd_record (dlsp->handle, 1);
1818 line = dfm_get_record (dlsp->handle, &len);
1821 dfm_fwd_record (dlsp->handle);
1823 /* Calculate occurs, length. */
1824 occurs_left = occurs = realize_value (&t->occurs, c);
1827 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1830 starts_beg = realize_value (&t->starts_beg, c);
1831 if (starts_beg <= 0)
1833 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1838 starts_end = realize_value (&t->starts_end, c);
1839 if (starts_end < starts_beg)
1841 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1842 "beginning column (%d)."),
1843 starts_end, starts_beg);
1844 skip_first_record = 1;
1846 length = realize_value (&t->length, c);
1849 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1851 occurs = occurs_left = 1;
1853 cont_beg = realize_value (&t->cont_beg, c);
1856 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1861 cont_end = realize_value (&t->cont_end, c);
1862 if (cont_end < cont_beg)
1864 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1865 "beginning column (%d)."),
1866 cont_end, cont_beg);
1870 /* Parse the first record. */
1871 if (!skip_first_record)
1873 code = rpd_parse_record (starts_beg, starts_end, length, c, t, line,
1874 len, 0, occurs_left);
1878 else if (cont_beg == 0)
1881 /* Make sure, if some occurrences are left, that we have
1882 continuation records. */
1883 occurs_left -= code;
1884 if (occurs_left != 0 && cont_beg == 0)
1887 _("Number of repetitions specified on OCCURS (%d) "
1888 "exceed number of repetitions available in "
1889 "space on STARTS (%d), and CONTINUED not specified."),
1894 /* Go on to additional records. */
1895 while (occurs_left != 0)
1897 assert (occurs_left >= 0);
1899 /* Read in another record. */
1900 line = dfm_get_record (dlsp->handle, &len);
1904 _("Unexpected end of file with %d repetitions "
1905 "remaining out of %d."),
1906 occurs_left, occurs);
1909 dfm_fwd_record (dlsp->handle);
1911 /* Parse this record. */
1912 code = rpd_parse_record (cont_beg, cont_end, length, c, t, line,
1913 len, 1, occurs_left);
1916 occurs_left -= code;
1920 dfm_pop (dlsp->handle);
1922 /* FIXME: This is a kluge until we've implemented multiplexing of