1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 /* AIX requires this to be the first thing in the file. */
23 #define alloca __builtin_alloca
31 #ifndef alloca /* predefined by HP cc +Olibcalls */
46 #include "debug-print.h"
49 #include "file-handle.h"
59 /* Utility function. */
61 /* FIXME: Either REPEATING DATA must be the last transformation, or we
62 must multiplex the transformations that follow (i.e., perform them
63 for every case that we produce from a repetition instance).
64 Currently we do neither. We should do one or the other. */
66 /* Describes how to parse one variable. */
69 struct dls_var_spec *next;
70 struct variable *v; /* Associated variable. Used only in
71 parsing. Not safe later. */
72 char name[9]; /* Free-format: Name of variable. */
73 int rec; /* Fixed-format: Record number (1-based). */
74 int fc, lc; /* Fixed-format: Column numbers in record. */
75 struct fmt_spec input; /* Input format of this field. */
76 int fv; /* First value in case. */
77 int type; /* 0=numeric, >0=width of alpha field. */
80 /* Constants for DATA LIST type. */
81 /* Must match table in cmd_data_list(). */
89 /* DATA LIST private data structure. */
93 struct dls_var_spec *spec; /* Variable parsing specifications. */
94 struct file_handle *handle; /* Input file, never NULL. */
95 /* Do not reorder preceding fields. */
97 int type; /* A DLS_* constant. */
98 struct variable *end; /* Variable specified on END subcommand. */
99 int eof; /* End of file encountered. */
100 int nrec; /* Number of records. */
103 /* Holds information on parsing the data file. */
104 static struct data_list_pgm dls;
106 /* Pointer to a pointer to where the first dls_var_spec should go. */
107 static struct dls_var_spec **first;
109 /* Last dls_var_spec in the chain. Used for building the linked-list. */
110 static struct dls_var_spec *next;
112 static int parse_fixed (void);
113 static int parse_free (void);
114 static void dump_fixed_table (void);
115 static void dump_free_table (void);
116 static void destroy_dls (struct trns_header *);
117 static int read_one_case (struct trns_header *, struct ccase *);
119 /* Message title for REPEATING DATA. */
120 #define RPD_ERR "REPEATING DATA: "
125 /* 0=print no table, 1=print table. (TABLE subcommand.) */
128 lex_match_id ("DATA");
129 lex_match_id ("LIST");
131 if (vfm_source != &input_program_source
132 && vfm_source != &file_type_source)
133 discard_variables ();
135 dls.handle = default_handle;
146 if (lex_match_id ("FILE"))
149 dls.handle = fh_parse_file_handle ();
152 if (vfm_source == &file_type_source && dls.handle != default_handle)
154 msg (SE, _("DATA LIST may not use a different file from "
155 "that specified on its surrounding FILE TYPE."));
159 else if (lex_match_id ("RECORDS"))
163 if (!lex_force_int ())
165 dls.nrec = lex_integer ();
169 else if (lex_match_id ("END"))
173 msg (SE, _("The END subcommand may only be specified once."));
178 if (!lex_force_id ())
180 dls.end = find_variable (tokid);
182 dls.end = force_create_variable (&default_dict, tokid, NUMERIC, 0);
185 else if (token == T_ID)
187 /* Must match DLS_* constants. */
188 static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE",
193 for (p = id; *p; p++)
194 if (lex_id_match (*p, tokid))
209 msg (SE, _("Only one of FIXED, FREE, or LIST may "
226 default_handle = dls.handle;
229 dls.type = DLS_FIXED;
233 if (dls.type == DLS_FREE)
239 if (dls.type == DLS_FIXED)
254 if (vfm_source != NULL)
256 struct data_list_pgm *new_pgm;
258 dls.h.proc = read_one_case;
259 dls.h.free = destroy_dls;
261 new_pgm = xmalloc (sizeof *new_pgm);
262 memcpy (new_pgm, &dls, sizeof *new_pgm);
263 add_transformation ((struct trns_header *) new_pgm);
266 vfm_source = &data_list_source;
272 append_var_spec (struct dls_var_spec *spec)
275 *first = next = xmalloc (sizeof *spec);
277 next = next->next = xmalloc (sizeof *spec);
279 memcpy (next, spec, sizeof *spec);
283 /* Fixed-format parsing. */
285 /* Used for chaining together fortran-like format specifiers. */
288 struct fmt_list *next;
291 struct fmt_list *down;
294 /* Used as "local" variables among the fixed-format parsing funcs. If
295 it were guaranteed that PSPP were going to be compiled by gcc,
296 I'd make all these functions a single set of nested functions. */
299 char **name; /* Variable names. */
300 int nname; /* Number of names. */
301 int cname; /* dump_fmt_list: index of next name to use. */
303 int recno; /* Index of current record. */
304 int sc; /* 1-based column number of starting column for
305 next field to output. */
307 struct dls_var_spec spec; /* Next specification to output. */
308 int fc, lc; /* First, last column in set of fields specified
311 int level; /* Nesting level in fixed_parse_fortran(). */
315 static int fixed_parse_compatible (void);
316 static struct fmt_list *fixed_parse_fortran (void);
328 while (lex_match ('/'))
331 if (lex_integer_p ())
333 if (lex_integer () < fx.recno)
335 msg (SE, _("The record number specified, %ld, is "
336 "before the previous record, %d. Data "
337 "fields must be listed in order of "
338 "increasing record number."),
339 lex_integer (), fx.recno - 1);
343 fx.recno = lex_integer ();
348 fx.spec.rec = fx.recno;
350 if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE))
355 if (!fixed_parse_compatible ())
358 else if (token == '(')
362 if (!fixed_parse_fortran ())
367 msg (SE, _("SPSS-like or FORTRAN-like format "
368 "specification expected after variable names."));
372 for (i = 0; i < fx.nname; i++)
376 if (dls.nrec && next->rec > dls.nrec)
378 msg (SE, _("Variables are specified on records that "
379 "should not exist according to RECORDS subcommand."));
383 dls.nrec = next->rec;
386 lex_error (_("expecting end of command"));
392 for (i = 0; i < fx.nname; i++)
399 fixed_parse_compatible (void)
404 if (!lex_force_int ())
407 fx.fc = lex_integer ();
410 msg (SE, _("Column positions for fields must be positive."));
415 lex_negative_to_dash ();
418 if (!lex_force_int ())
420 fx.lc = lex_integer ();
423 msg (SE, _("Column positions for fields must be positive."));
426 else if (fx.lc < fx.fc)
428 msg (SE, _("The ending column for a field must be "
429 "greater than the starting column."));
438 fx.spec.input.w = fx.lc - fx.fc + 1;
441 struct fmt_desc *fdp;
447 fx.spec.input.type = parse_format_specifier_name (&cp, 0);
448 if (fx.spec.input.type == -1)
452 msg (SE, _("A format specifier on this line "
453 "has extra characters on the end."));
461 fx.spec.input.type = FMT_F;
463 if (lex_integer_p ())
465 if (lex_integer () < 1)
467 msg (SE, _("The value for number of decimal places "
468 "must be at least 1."));
472 fx.spec.input.d = lex_integer ();
478 fdp = &formats[fx.spec.input.type];
479 if (fdp->n_args < 2 && fx.spec.input.d)
481 msg (SE, _("Input format %s doesn't accept decimal places."),
486 if (fx.spec.input.d > 16)
487 fx.spec.input.d = 16;
489 if (!lex_force_match (')'))
494 fx.spec.input.type = FMT_F;
500 if ((fx.lc - fx.fc + 1) % fx.nname)
502 msg (SE, _("The %d columns %d-%d "
503 "can't be evenly divided into %d fields."),
504 fx.lc - fx.fc + 1, fx.fc, fx.lc, fx.nname);
508 dividend = (fx.lc - fx.fc + 1) / fx.nname;
509 fx.spec.input.w = dividend;
510 if (!check_input_specifier (&fx.spec.input))
513 for (i = 0; i < fx.nname; i++)
518 if (fx.spec.input.type == FMT_A || fx.spec.input.type == FMT_AHEX)
523 v = create_variable (&default_dict, fx.name[i], type, dividend);
526 convert_fmt_ItoO (&fx.spec.input, &v->print);
531 v = find_variable (fx.name[i]);
535 msg (SE, _("%s is a duplicate variable name."), fx.name[i]);
540 msg (SE, _("There is already a variable %s of a "
545 if (type == ALPHA && dividend != v->width)
547 msg (SE, _("There is already a string variable %s of a "
548 "different width."), fx.name[i]);
554 fx.spec.fc = fx.fc + dividend * i;
555 fx.spec.lc = fx.spec.fc + dividend - 1;
557 fx.spec.type = v->type == NUMERIC ? 0 : v->width;
558 append_var_spec (&fx.spec);
563 /* Destroy a format list and, optionally, all its sublists. */
565 destroy_fmt_list (struct fmt_list *f, int recurse)
567 struct fmt_list *next;
572 if (recurse && f->f.type == FMT_DESCEND)
573 destroy_fmt_list (f->down, 1);
578 /* Takes a hierarchically structured fmt_list F as constructed by
579 fixed_parse_fortran(), and flattens it into a linear list of
582 dump_fmt_list (struct fmt_list *f)
586 for (; f; f = f->next)
587 if (f->f.type == FMT_X)
589 else if (f->f.type == FMT_T)
591 else if (f->f.type == FMT_NEWREC)
593 fx.recno += f->count;
597 for (i = 0; i < f->count; i++)
598 if (f->f.type == FMT_DESCEND)
600 if (!dump_fmt_list (f->down))
608 type = (formats[f->f.type].cat & FCAT_STRING) ? ALPHA : NUMERIC;
609 if (fx.cname >= fx.nname)
611 msg (SE, _("The number of format "
612 "specifications exceeds the number of "
613 "variable names given."));
617 fx.spec.v = v = create_variable (&default_dict,
622 msg (SE, _("%s is a duplicate variable name."), fx.name[i]);
626 fx.spec.input = f->f;
627 convert_fmt_ItoO (&fx.spec.input, &v->print);
630 fx.spec.rec = fx.recno;
632 fx.spec.lc = fx.sc + f->f.w - 1;
634 fx.spec.type = v->type == NUMERIC ? 0 : v->width;
635 append_var_spec (&fx.spec);
642 /* Calls itself recursively to parse nested levels of parentheses.
643 Returns to its original caller: NULL, to indicate error; non-NULL,
644 but nothing useful, to indicate success (it returns a free()'d
646 static struct fmt_list *
647 fixed_parse_fortran (void)
649 struct fmt_list *head;
650 struct fmt_list *fl = NULL;
652 lex_get (); /* Skip opening parenthesis. */
656 fl = fl->next = xmalloc (sizeof *fl);
658 head = fl = xmalloc (sizeof *fl);
660 if (lex_integer_p ())
662 fl->count = lex_integer ();
670 fl->f.type = FMT_DESCEND;
672 fl->down = fixed_parse_fortran ();
677 else if (lex_match ('/'))
678 fl->f.type = FMT_NEWREC;
679 else if (!parse_format_specifier (&fl->f, 1)
680 || !check_input_specifier (&fl->f))
692 dump_fmt_list (head);
693 if (fx.cname < fx.nname)
695 msg (SE, _("There aren't enough format specifications "
696 "to match the number of variable names given."));
699 destroy_fmt_list (head, 1);
704 destroy_fmt_list (head, 0);
709 /* Displays a table giving information on fixed-format variable
710 parsing on DATA LIST. */
711 /* FIXME: The `Columns' column should be divided into three columns,
712 one for the starting column, one for the dash, one for the ending
713 column; then right-justify the starting column and left-justify the
716 dump_fixed_table (void)
718 struct dls_var_spec *spec;
721 const char *filename;
724 for (i = 0, spec = *first; spec; spec = spec->next)
726 t = tab_create (4, i + 1, 0);
727 tab_columns (t, TAB_COL_DOWN, 1);
728 tab_headers (t, 0, 0, 1, 0);
729 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
730 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
731 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
732 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
733 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
734 tab_hline (t, TAL_2, 0, 3, 1);
735 tab_dim (t, tab_natural_dimensions);
737 for (i = 1, spec = *first; spec; spec = spec->next, i++)
739 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
740 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
741 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
743 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
744 fmt_to_string (&spec->input));
747 if (*first == dls.spec)
749 filename = fh_handle_name (dls.handle);
750 if (filename == NULL)
752 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
753 sprintf (buf, (dls.handle != inline_file
755 ngettext("Reading %d record from file %s.",
756 "Reading %d records from file %s.",dls.nrec)
758 ngettext("Reading %d record from the command file.",
759 "Reading %d records from the command file.",
765 buf = local_alloc (strlen (_("Occurrence data specifications.")) + 1);
766 strcpy (buf, _("Occurrence data specifications."));
769 tab_title (t, 0, buf);
771 fh_handle_name (NULL);
775 /* Free-format parsing. */
780 struct dls_var_spec spec;
781 struct fmt_spec in, out;
790 if (!parse_DATA_LIST_vars (&name, &nname, PV_NONE))
794 if (!parse_format_specifier (&in, 0) || !check_input_specifier (&in))
796 if (!lex_force_match (')'))
798 convert_fmt_ItoO (&in, &out);
810 if (in.type == FMT_A || in.type == FMT_AHEX)
814 for (i = 0; i < nname; i++)
818 spec.v = v = create_variable (&default_dict, name[i], type, in.w);
821 msg (SE, _("%s is a duplicate variable name."), name[i]);
825 v->print = v->write = out;
827 strcpy (spec.name, name[i]);
829 spec.type = type == NUMERIC ? 0 : v->width;
830 append_var_spec (&spec);
832 for (i = 0; i < nname; i++)
838 lex_error (_("expecting end of command"));
842 for (i = 0; i < nname; i++)
848 /* Displays a table giving information on free-format variable parsing
851 dump_free_table (void)
857 struct dls_var_spec *spec;
858 for (i = 0, spec = dls.spec; spec; spec = spec->next)
862 t = tab_create (2, i + 1, 0);
863 tab_columns (t, TAB_COL_DOWN, 1);
864 tab_headers (t, 0, 0, 1, 0);
865 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
866 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
867 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
868 tab_hline (t, TAL_2, 0, 1, 1);
869 tab_dim (t, tab_natural_dimensions);
872 struct dls_var_spec *spec;
874 for (i = 1, spec = dls.spec; spec; spec = spec->next, i++)
876 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
877 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
882 const char *filename;
884 filename = fh_handle_name (dls.handle);
885 if (filename == NULL)
888 (dls.handle != inline_file
889 ? _("Reading free-form data from file %s.")
890 : _("Reading free-form data from the command file.")),
895 fh_handle_name (NULL);
898 /* Input procedure. */
900 /* Pointer to relevant parsing data. Static just to avoid passing it
902 static struct data_list_pgm *dlsp;
904 /* Extracts a field from the current position in the current record.
905 Fields can be unquoted or quoted with single- or double-quote
906 characters. *RET_LEN is set to the field length, *RET_CP is set to
907 the field itself. After parsing the field, sets the current
908 position in the record to just past the field. Returns 0 on
909 failure or a 1-based column number indicating the beginning of the
912 cut_field (char **ret_cp, int *ret_len)
917 cp = dfm_get_record (dlsp->handle, &len);
923 /* Skip leading whitespace and commas. */
924 while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep)
929 /* Three types of fields: quoted with ', quoted with ", unquoted. */
930 if (*cp == '\'' || *cp == '"')
935 while (cp < ep && *cp != quote)
937 *ret_len = cp - *ret_cp;
941 msg (SW, _("Scope of string exceeds line."));
946 while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',')
948 *ret_len = cp - *ret_cp;
952 int beginning_column;
954 dfm_set_record (dlsp->handle, *ret_cp);
955 beginning_column = dfm_get_cur_col (dlsp->handle) + 1;
957 dfm_set_record (dlsp->handle, cp);
959 return beginning_column;
963 static int read_from_data_list_fixed (void);
964 static int read_from_data_list_free (void);
965 static int read_from_data_list_list (void);
966 static int do_reading (int flag);
968 /* FLAG==0: reads any number of cases into temp_case and calls
969 write_case() for each one, returns garbage. FLAG!=0: reads one
970 case into temp_case and returns -2 on eof, -1 otherwise.
971 Uses dlsp as the relevant parsing description. */
973 do_reading (int flag)
979 dfm_push (dlsp->handle);
984 func = read_from_data_list_fixed;
987 func = read_from_data_list_free;
990 func = read_from_data_list_list;
1002 msg (SE, _("Attempt to read past end of file."));
1011 if (dlsp->end != NULL)
1015 printf ("end of file, setting %s to 1\n", dlsp->end->name);
1016 temp_case->data[dlsp->end->fv].f = 1.0;
1021 printf ("not end of file, setting %s to 0\n", dlsp->end->name);
1022 temp_case->data[dlsp->end->fv].f = 0.0;
1028 while (func () != -2)
1031 debug_printf ((_("abort in write_case()\n")));
1034 fh_close_handle (dlsp->handle);
1036 dfm_pop (dlsp->handle);
1041 /* Reads a case from the data file and parses it according to
1042 fixed-format syntax rules. */
1044 read_from_data_list_fixed (void)
1046 struct dls_var_spec *var_spec = dlsp->spec;
1049 if (!dfm_get_record (dlsp->handle, NULL))
1051 for (i = 1; i <= dlsp->nrec; i++)
1054 char *line = dfm_get_record (dlsp->handle, &len);
1058 /* Note that this can't occur on the first record. */
1059 msg (SW, _("Partial case of %d of %d records discarded."),
1064 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1068 data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc);
1069 di.v = &temp_case->data[var_spec->fv];
1071 di.f1 = var_spec->fc;
1072 di.format = var_spec->input;
1077 dfm_fwd_record (dlsp->handle);
1083 /* Reads a case from the data file and parses it according to
1084 free-format syntax rules. */
1086 read_from_data_list_free (void)
1088 struct dls_var_spec *var_spec;
1092 for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next)
1096 /* Cut out a field and read in a new record if necessary. */
1099 column = cut_field (&field, &len);
1103 if (dfm_get_record (dlsp->handle, NULL))
1104 dfm_fwd_record (dlsp->handle);
1105 if (!dfm_get_record (dlsp->handle, NULL))
1107 if (var_spec != dlsp->spec)
1108 msg (SW, _("Partial case discarded. The first variable "
1109 "missing was %s."), var_spec->name);
1119 di.v = &temp_case->data[var_spec->fv];
1122 di.format = var_spec->input;
1129 /* Reads a case from the data file and parses it according to
1130 list-format syntax rules. */
1132 read_from_data_list_list (void)
1134 struct dls_var_spec *var_spec;
1138 if (!dfm_get_record (dlsp->handle, NULL))
1141 for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next)
1143 /* Cut out a field and check for end-of-line. */
1144 int column = cut_field (&field, &len);
1149 msg (SW, _("Missing value(s) for all variables from %s onward. "
1150 "These will be filled with the system-missing value "
1151 "or blanks, as appropriate."),
1153 for (; var_spec; var_spec = var_spec->next)
1154 if (!var_spec->type)
1155 temp_case->data[var_spec->fv].f = SYSMIS;
1157 memset (temp_case->data[var_spec->fv].s, ' ', var_spec->type);
1166 di.v = &temp_case->data[var_spec->fv];
1169 di.format = var_spec->input;
1174 dfm_fwd_record (dlsp->handle);
1178 /* Destroys DATA LIST transformation or input program PGM. */
1180 destroy_dls (struct trns_header *pgm)
1182 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1183 struct dls_var_spec *iter, *next;
1192 fh_close_handle (dls->handle);
1195 /* Note that since this is exclusively an input program, C is
1196 guaranteed to be temp_case. */
1198 read_one_case (struct trns_header *t, struct ccase *c unused)
1200 dlsp = (struct data_list_pgm *) t;
1201 return do_reading (1);
1204 /* Reads all the records from the data file and passes them to
1207 data_list_source_read (void)
1213 /* Destroys the source's internal data. */
1215 data_list_source_destroy_source (void)
1217 destroy_dls ((struct trns_header *) & dls);
1220 struct case_stream data_list_source =
1223 data_list_source_read,
1226 data_list_source_destroy_source,
1231 /* REPEATING DATA. */
1233 /* Represents a number or a variable. */
1234 struct rpd_num_or_var
1236 int num; /* Value, or 0. */
1237 struct variable *var; /* Variable, if number==0. */
1240 /* REPEATING DATA private data structure. */
1241 struct repeating_data_trns
1243 struct trns_header h;
1244 struct dls_var_spec *spec; /* Variable parsing specifications. */
1245 struct file_handle *handle; /* Input file, never NULL. */
1246 /* Do not reorder preceding fields. */
1248 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1249 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1250 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1251 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1252 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1253 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1254 int id_beg, id_end; /* ID subcommand, beginning & end columns. */
1255 struct variable *id_var; /* ID subcommand, DATA LIST variable. */
1256 struct fmt_spec id_spec; /* ID subcommand, input format spec. */
1259 /* Information about the transformation being parsed. */
1260 static struct repeating_data_trns rpd;
1262 static int read_one_set_of_repetitions (struct trns_header *, struct ccase *);
1263 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1264 static int parse_repeating_data (void);
1265 static void find_variable_input_spec (struct variable *v,
1266 struct fmt_spec *spec);
1268 /* Parses the REPEATING DATA command. */
1270 cmd_repeating_data (void)
1272 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1275 /* Bits are set when a particular subcommand has been seen. */
1278 lex_match_id ("REPEATING");
1279 lex_match_id ("DATA");
1281 assert (vfm_source == &input_program_source
1282 || vfm_source == &file_type_source);
1284 rpd.handle = default_handle;
1285 rpd.starts_beg.num = 0;
1286 rpd.starts_beg.var = NULL;
1287 rpd.starts_end = rpd.occurs = rpd.length = rpd.cont_beg
1288 = rpd.cont_end = rpd.starts_beg;
1289 rpd.id_beg = rpd.id_end = 0;
1299 if (lex_match_id ("FILE"))
1302 rpd.handle = fh_parse_file_handle ();
1305 if (rpd.handle != default_handle)
1307 msg (SE, _("REPEATING DATA must use the same file as its "
1308 "corresponding DATA LIST or FILE TYPE."));
1312 else if (lex_match_id ("STARTS"))
1317 msg (SE, _("STARTS subcommand given multiple times."));
1322 if (!parse_num_or_var (&rpd.starts_beg, "STARTS beginning column"))
1325 lex_negative_to_dash ();
1326 if (lex_match ('-'))
1328 if (!parse_num_or_var (&rpd.starts_end, "STARTS ending column"))
1331 /* Otherwise, rpd.starts_end is left uninitialized.
1332 This is okay. We will initialize it later from the
1333 record length of the file. We can't do this now
1334 because we can't be sure that the user has specified
1335 the file handle yet. */
1338 if (rpd.starts_beg.num != 0 && rpd.starts_end.num != 0
1339 && rpd.starts_beg.num > rpd.starts_end.num)
1341 msg (SE, _("STARTS beginning column (%d) exceeds "
1342 "STARTS ending column (%d)."),
1343 rpd.starts_beg.num, rpd.starts_end.num);
1347 else if (lex_match_id ("OCCURS"))
1352 msg (SE, _("OCCURS subcommand given multiple times."));
1357 if (!parse_num_or_var (&rpd.occurs, "OCCURS"))
1360 else if (lex_match_id ("LENGTH"))
1365 msg (SE, _("LENGTH subcommand given multiple times."));
1370 if (!parse_num_or_var (&rpd.length, "LENGTH"))
1373 else if (lex_match_id ("CONTINUED"))
1378 msg (SE, _("CONTINUED subcommand given multiple times."));
1383 if (!lex_match ('/'))
1385 if (!parse_num_or_var (&rpd.cont_beg, "CONTINUED beginning column"))
1388 lex_negative_to_dash ();
1390 && !parse_num_or_var (&rpd.cont_end,
1391 "CONTINUED ending column"))
1394 if (rpd.cont_beg.num != 0 && rpd.cont_end.num != 0
1395 && rpd.cont_beg.num > rpd.cont_end.num)
1397 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1398 "CONTINUED ending column (%d)."),
1399 rpd.cont_beg.num, rpd.cont_end.num);
1404 rpd.cont_beg.num = 1;
1406 else if (lex_match_id ("ID"))
1411 msg (SE, _("ID subcommand given multiple times."));
1416 if (!lex_force_int ())
1418 if (lex_integer () < 1)
1420 msg (SE, _("ID beginning column (%ld) must be positive."),
1424 rpd.id_beg = lex_integer ();
1427 lex_negative_to_dash ();
1429 if (lex_match ('-'))
1431 if (!lex_force_int ())
1433 if (lex_integer () < 1)
1435 msg (SE, _("ID ending column (%ld) must be positive."),
1439 if (lex_integer () < rpd.id_end)
1441 msg (SE, _("ID ending column (%ld) cannot be less than "
1442 "ID beginning column (%d)."),
1443 lex_integer (), rpd.id_beg);
1447 rpd.id_end = lex_integer ();
1450 else rpd.id_end = rpd.id_beg;
1452 if (!lex_force_match ('='))
1454 rpd.id_var = parse_variable ();
1455 if (rpd.id_var == NULL)
1458 find_variable_input_spec (rpd.id_var, &rpd.id_spec);
1460 else if (lex_match_id ("TABLE"))
1462 else if (lex_match_id ("NOTABLE"))
1464 else if (lex_match_id ("DATA"))
1472 if (!lex_force_match ('/'))
1476 /* Comes here when DATA specification encountered. */
1477 if ((seen & (1 | 2)) != (1 | 2))
1479 if ((seen & 1) == 0)
1480 msg (SE, _("Missing required specification STARTS."));
1481 if ((seen & 2) == 0)
1482 msg (SE, _("Missing required specification OCCURS."));
1486 /* Enforce ID restriction. */
1487 if ((seen & 16) && !(seen & 8))
1489 msg (SE, _("ID specified without CONTINUED."));
1493 /* Calculate starts_end, cont_end if necessary. */
1494 if (rpd.starts_end.num == 0 && rpd.starts_end.var == NULL)
1495 rpd.starts_end.num = fh_record_width (rpd.handle);
1496 if (rpd.cont_end.num == 0 && rpd.starts_end.var == NULL)
1497 rpd.cont_end.num = fh_record_width (rpd.handle);
1499 /* Calculate length if possible. */
1500 if ((seen & 4) == 0)
1502 struct dls_var_spec *iter;
1504 for (iter = rpd.spec; iter; iter = iter->next)
1506 if (iter->lc > rpd.length.num)
1507 rpd.length.num = iter->lc;
1509 assert (rpd.length.num != 0);
1513 if (!parse_repeating_data ())
1517 dump_fixed_table ();
1520 struct repeating_data_trns *new_trns;
1522 rpd.h.proc = read_one_set_of_repetitions;
1523 rpd.h.free = destroy_dls;
1525 new_trns = xmalloc (sizeof *new_trns);
1526 memcpy (new_trns, &rpd, sizeof *new_trns);
1527 add_transformation ((struct trns_header *) new_trns);
1530 return lex_end_of_command ();
1533 /* Because of the way that DATA LIST is structured, it's not trivial
1534 to determine what input format is associated with a given variable.
1535 This function finds the input format specification for variable V
1536 and puts it in SPEC. */
1538 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1542 for (i = 0; i < n_trns; i++)
1544 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1546 if (pgm->h.proc == read_one_case)
1548 struct dls_var_spec *iter;
1550 for (iter = pgm->spec; iter; iter = iter->next)
1553 *spec = iter->input;
1562 /* Parses a number or a variable name from the syntax file and puts
1563 the results in VALUE. Ensures that the number is at least 1; else
1564 emits an error based on MESSAGE. Returns nonzero only if
1567 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1572 value->var = parse_variable ();
1573 if (value->var == NULL)
1575 if (value->var->type == ALPHA)
1577 msg (SE, _("String variable not allowed here."));
1581 else if (lex_integer_p ())
1583 value->num = lex_integer ();
1587 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1593 msg (SE, _("Variable or integer expected for %s."), message);
1599 /* Parses data specifications for repeating data groups. Taken from
1600 parse_fixed(). Returns nonzero only if successful. */
1602 parse_repeating_data (void)
1609 while (token != '.')
1611 fx.spec.rec = fx.recno;
1613 if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE))
1618 if (!fixed_parse_compatible ())
1621 else if (token == '(')
1625 if (!fixed_parse_fortran ())
1630 msg (SE, _("SPSS-like or FORTRAN-like format "
1631 "specification expected after variable names."));
1635 for (i = 0; i < fx.nname; i++)
1641 lex_error (_("expecting end of command"));
1648 for (i = 0; i < fx.nname; i++)
1654 /* Obtains the real value for rpd_num_or_var N in case C and returns
1655 it. The valid range is nonnegative numbers, but numbers outside
1656 this range can be returned and should be handled by the caller as
1659 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1664 assert (n->num == 0);
1667 double v = c->data[n->var->fv].f;
1669 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1678 /* Parses one record of repeated data and outputs corresponding cases.
1679 Repeating data is present in line LINE having length LEN.
1680 Repeating data begins in column BEG and continues through column
1681 END inclusive (1-based columns); occurrences are offset OFS columns
1682 from each other. C is the case that will be filled in; T is the
1683 REPEATING DATA transformation. The record ID will be verified if
1684 COMPARE_ID is nonzero; if it is zero, then the record ID is
1685 initialized to the ID present in the case (assuming that ID
1686 location was specified by the user). Returns number of occurrences
1687 parsed up to the specified maximum of MAX_OCCURS. */
1689 rpd_parse_record (int beg, int end, int ofs, struct ccase *c,
1690 struct repeating_data_trns *t,
1691 char *line, int len, int compare_id, int max_occurs)
1696 /* Handle record ID values. */
1699 static union value comparator;
1705 data_in_finite_line (&di, line, len, t->id_beg, t->id_end);
1709 di.format = t->id_spec;
1715 if (compare_id == 0)
1717 else if ((t->id_var->type == NUMERIC && comparator.f != v.f)
1718 || (t->id_var->type == ALPHA
1719 && strncmp (comparator.s, v.s, t->id_var->width)))
1724 if (!data_out (comp_str, &t->id_var->print, &comparator))
1726 if (!data_out (v_str, &t->id_var->print, &v))
1729 comp_str[t->id_var->print.w] = v_str[t->id_var->print.w] = 0;
1732 _("Mismatched case ID (%s). Expected value was %s."),
1739 /* Iterate over the set of expected occurrences and record each of
1740 them as a separate case. FIXME: We need to execute any
1741 transformations that follow the current one. */
1745 for (occurrences = 0; occurrences < max_occurs; )
1747 if (cur + ofs > end + 1)
1752 struct dls_var_spec *var_spec = t->spec;
1754 for (; var_spec; var_spec = var_spec->next)
1756 int fc = var_spec->fc - 1 + cur;
1757 int lc = var_spec->lc - 1 + cur;
1759 if (fc > len && !warned && var_spec->input.type != FMT_A)
1764 _("Variable %s startging in column %d extends "
1765 "beyond physical record length of %d."),
1766 var_spec->v->name, fc, len);
1772 data_in_finite_line (&di, line, len, fc, lc);
1773 di.v = &c->data[var_spec->fv];
1776 di.format = var_spec->input;
1794 /* Analogous to read_one_case; reads one set of repetitions of the
1795 elements in the REPEATING DATA structure. Returns -1 on success,
1796 -2 on end of file or on failure. */
1798 read_one_set_of_repetitions (struct trns_header *trns, struct ccase *c)
1800 dfm_push (dlsp->handle);
1803 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1805 char *line; /* Current record. */
1806 int len; /* Length of current record. */
1808 int starts_beg; /* Starting column. */
1809 int starts_end; /* Ending column. */
1810 int occurs; /* Number of repetitions. */
1811 int length; /* Length of each occurrence. */
1812 int cont_beg; /* Starting column for continuation lines. */
1813 int cont_end; /* Ending column for continuation lines. */
1815 int occurs_left; /* Number of occurrences remaining. */
1817 int code; /* Return value from rpd_parse_record(). */
1819 int skip_first_record = 0;
1821 /* Read the current record. */
1822 dfm_bkwd_record (dlsp->handle, 1);
1823 line = dfm_get_record (dlsp->handle, &len);
1826 dfm_fwd_record (dlsp->handle);
1828 /* Calculate occurs, length. */
1829 occurs_left = occurs = realize_value (&t->occurs, c);
1832 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1835 starts_beg = realize_value (&t->starts_beg, c);
1836 if (starts_beg <= 0)
1838 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1843 starts_end = realize_value (&t->starts_end, c);
1844 if (starts_end < starts_beg)
1846 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1847 "beginning column (%d)."),
1848 starts_end, starts_beg);
1849 skip_first_record = 1;
1851 length = realize_value (&t->length, c);
1854 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1856 occurs = occurs_left = 1;
1858 cont_beg = realize_value (&t->cont_beg, c);
1861 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1866 cont_end = realize_value (&t->cont_end, c);
1867 if (cont_end < cont_beg)
1869 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1870 "beginning column (%d)."),
1871 cont_end, cont_beg);
1875 /* Parse the first record. */
1876 if (!skip_first_record)
1878 code = rpd_parse_record (starts_beg, starts_end, length, c, t, line,
1879 len, 0, occurs_left);
1883 else if (cont_beg == 0)
1886 /* Make sure, if some occurrences are left, that we have
1887 continuation records. */
1888 occurs_left -= code;
1889 if (occurs_left != 0 && cont_beg == 0)
1892 _("Number of repetitions specified on OCCURS (%d) "
1893 "exceed number of repetitions available in "
1894 "space on STARTS (%d), and CONTINUED not specified."),
1899 /* Go on to additional records. */
1900 while (occurs_left != 0)
1902 assert (occurs_left >= 0);
1904 /* Read in another record. */
1905 line = dfm_get_record (dlsp->handle, &len);
1909 _("Unexpected end of file with %d repetitions "
1910 "remaining out of %d."),
1911 occurs_left, occurs);
1914 dfm_fwd_record (dlsp->handle);
1916 /* Parse this record. */
1917 code = rpd_parse_record (cont_beg, cont_end, length, c, t, line,
1918 len, 1, occurs_left);
1921 occurs_left -= code;
1925 dfm_pop (dlsp->handle);
1927 /* FIXME: This is a kluge until we've implemented multiplexing of