1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "data-list.h"
30 #include "debug-print.h"
33 #include "file-handle.h"
43 /* Utility function. */
45 /* FIXME: Either REPEATING DATA must be the last transformation, or we
46 must multiplex the transformations that follow (i.e., perform them
47 for every case that we produce from a repetition instance).
48 Currently we do neither. We should do one or the other. */
50 /* Describes how to parse one variable. */
53 struct dls_var_spec *next;
54 struct variable *v; /* Associated variable. Used only in
55 parsing. Not safe later. */
56 char name[9]; /* Free-format: Name of variable. */
57 int rec; /* Fixed-format: Record number (1-based). */
58 int fc, lc; /* Fixed-format: Column numbers in record. */
59 struct fmt_spec input; /* Input format of this field. */
60 int fv; /* First value in case. */
61 int width; /* 0=numeric, >0=width of alpha field. */
64 /* Constants for DATA LIST type. */
65 /* Must match table in cmd_data_list(). */
73 /* DATA LIST private data structure. */
77 struct dls_var_spec *spec; /* Variable parsing specifications. */
78 struct file_handle *handle; /* Input file, never NULL. */
79 /* Do not reorder preceding fields. */
81 int type; /* A DLS_* constant. */
82 struct variable *end; /* Variable specified on END subcommand. */
83 int eof; /* End of file encountered. */
84 int nrec; /* Number of records. */
87 /* Holds information on parsing the data file. */
88 static struct data_list_pgm dls;
90 /* Pointer to a pointer to where the first dls_var_spec should go. */
91 static struct dls_var_spec **first;
93 /* Last dls_var_spec in the chain. Used for building the linked-list. */
94 static struct dls_var_spec *next;
96 static int parse_fixed (void);
97 static int parse_free (void);
98 static void dump_fixed_table (void);
99 static void dump_free_table (void);
100 static void destroy_dls (struct trns_header *);
101 static int read_one_case (struct trns_header *, struct ccase *);
103 /* Message title for REPEATING DATA. */
104 #define RPD_ERR "REPEATING DATA: "
109 /* 0=print no table, 1=print table. (TABLE subcommand.) */
112 lex_match_id ("DATA");
113 lex_match_id ("LIST");
115 if (vfm_source != &input_program_source
116 && vfm_source != &file_type_source)
117 discard_variables ();
119 dls.handle = default_handle;
130 if (lex_match_id ("FILE"))
133 dls.handle = fh_parse_file_handle ();
136 if (vfm_source == &file_type_source && dls.handle != default_handle)
138 msg (SE, _("DATA LIST may not use a different file from "
139 "that specified on its surrounding FILE TYPE."));
143 else if (lex_match_id ("RECORDS"))
147 if (!lex_force_int ())
149 dls.nrec = lex_integer ();
153 else if (lex_match_id ("END"))
157 msg (SE, _("The END subcommand may only be specified once."));
162 if (!lex_force_id ())
164 dls.end = dict_lookup_var (default_dict, tokid);
166 dls.end = dict_create_var_assert (default_dict, tokid, 0);
169 else if (token == T_ID)
171 /* Must match DLS_* constants. */
172 static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE",
177 for (p = id; *p; p++)
178 if (lex_id_match (*p, tokid))
193 msg (SE, _("Only one of FIXED, FREE, or LIST may "
210 default_handle = dls.handle;
213 dls.type = DLS_FIXED;
217 if (dls.type == DLS_FREE)
223 if (dls.type == DLS_FIXED)
238 if (vfm_source != NULL)
240 struct data_list_pgm *new_pgm;
242 dls.h.proc = read_one_case;
243 dls.h.free = destroy_dls;
245 new_pgm = xmalloc (sizeof *new_pgm);
246 memcpy (new_pgm, &dls, sizeof *new_pgm);
247 add_transformation ((struct trns_header *) new_pgm);
250 vfm_source = &data_list_source;
256 append_var_spec (struct dls_var_spec *spec)
259 *first = next = xmalloc (sizeof *spec);
261 next = next->next = xmalloc (sizeof *spec);
263 memcpy (next, spec, sizeof *spec);
267 /* Fixed-format parsing. */
269 /* Used for chaining together fortran-like format specifiers. */
272 struct fmt_list *next;
275 struct fmt_list *down;
278 /* Used as "local" variables among the fixed-format parsing funcs. If
279 it were guaranteed that PSPP were going to be compiled by gcc,
280 I'd make all these functions a single set of nested functions. */
283 char **name; /* Variable names. */
284 int nname; /* Number of names. */
285 int cname; /* dump_fmt_list: index of next name to use. */
287 int recno; /* Index of current record. */
288 int sc; /* 1-based column number of starting column for
289 next field to output. */
291 struct dls_var_spec spec; /* Next specification to output. */
292 int fc, lc; /* First, last column in set of fields specified
295 int level; /* Nesting level in fixed_parse_fortran(). */
299 static int fixed_parse_compatible (void);
300 static struct fmt_list *fixed_parse_fortran (void);
312 while (lex_match ('/'))
315 if (lex_integer_p ())
317 if (lex_integer () < fx.recno)
319 msg (SE, _("The record number specified, %ld, is "
320 "before the previous record, %d. Data "
321 "fields must be listed in order of "
322 "increasing record number."),
323 lex_integer (), fx.recno - 1);
327 fx.recno = lex_integer ();
332 fx.spec.rec = fx.recno;
334 if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE))
339 if (!fixed_parse_compatible ())
342 else if (token == '(')
346 if (!fixed_parse_fortran ())
351 msg (SE, _("SPSS-like or FORTRAN-like format "
352 "specification expected after variable names."));
356 for (i = 0; i < fx.nname; i++)
360 if (dls.nrec && next->rec > dls.nrec)
362 msg (SE, _("Variables are specified on records that "
363 "should not exist according to RECORDS subcommand."));
367 dls.nrec = next->rec;
370 lex_error (_("expecting end of command"));
376 for (i = 0; i < fx.nname; i++)
383 fixed_parse_compatible (void)
388 if (!lex_force_int ())
391 fx.fc = lex_integer ();
394 msg (SE, _("Column positions for fields must be positive."));
399 lex_negative_to_dash ();
402 if (!lex_force_int ())
404 fx.lc = lex_integer ();
407 msg (SE, _("Column positions for fields must be positive."));
410 else if (fx.lc < fx.fc)
412 msg (SE, _("The ending column for a field must be "
413 "greater than the starting column."));
422 fx.spec.input.w = fx.lc - fx.fc + 1;
425 struct fmt_desc *fdp;
431 fx.spec.input.type = parse_format_specifier_name (&cp, 0);
432 if (fx.spec.input.type == -1)
436 msg (SE, _("A format specifier on this line "
437 "has extra characters on the end."));
445 fx.spec.input.type = FMT_F;
447 if (lex_integer_p ())
449 if (lex_integer () < 1)
451 msg (SE, _("The value for number of decimal places "
452 "must be at least 1."));
456 fx.spec.input.d = lex_integer ();
462 fdp = &formats[fx.spec.input.type];
463 if (fdp->n_args < 2 && fx.spec.input.d)
465 msg (SE, _("Input format %s doesn't accept decimal places."),
470 if (fx.spec.input.d > 16)
471 fx.spec.input.d = 16;
473 if (!lex_force_match (')'))
478 fx.spec.input.type = FMT_F;
484 if ((fx.lc - fx.fc + 1) % fx.nname)
486 msg (SE, _("The %d columns %d-%d "
487 "can't be evenly divided into %d fields."),
488 fx.lc - fx.fc + 1, fx.fc, fx.lc, fx.nname);
492 dividend = (fx.lc - fx.fc + 1) / fx.nname;
493 fx.spec.input.w = dividend;
494 if (!check_input_specifier (&fx.spec.input))
497 for (i = 0; i < fx.nname; i++)
503 if (fx.spec.input.type == FMT_A || fx.spec.input.type == FMT_AHEX)
514 v = dict_create_var (default_dict, fx.name[i], width);
517 convert_fmt_ItoO (&fx.spec.input, &v->print);
519 if (vfm_source != &input_program_source
520 && vfm_source != &file_type_source)
525 v = dict_lookup_var_assert (default_dict, fx.name[i]);
528 msg (SE, _("%s is a duplicate variable name."), fx.name[i]);
533 msg (SE, _("There is already a variable %s of a "
538 if (type == ALPHA && dividend != v->width)
540 msg (SE, _("There is already a string variable %s of a "
541 "different width."), fx.name[i]);
547 fx.spec.fc = fx.fc + dividend * i;
548 fx.spec.lc = fx.spec.fc + dividend - 1;
550 fx.spec.width = v->width;
551 append_var_spec (&fx.spec);
556 /* Destroy a format list and, optionally, all its sublists. */
558 destroy_fmt_list (struct fmt_list *f, int recurse)
560 struct fmt_list *next;
565 if (recurse && f->f.type == FMT_DESCEND)
566 destroy_fmt_list (f->down, 1);
571 /* Takes a hierarchically structured fmt_list F as constructed by
572 fixed_parse_fortran(), and flattens it into a linear list of
575 dump_fmt_list (struct fmt_list *f)
579 for (; f; f = f->next)
580 if (f->f.type == FMT_X)
582 else if (f->f.type == FMT_T)
584 else if (f->f.type == FMT_NEWREC)
586 fx.recno += f->count;
590 for (i = 0; i < f->count; i++)
591 if (f->f.type == FMT_DESCEND)
593 if (!dump_fmt_list (f->down))
602 if (formats[f->f.type].cat & FCAT_STRING)
612 if (fx.cname >= fx.nname)
614 msg (SE, _("The number of format "
615 "specifications exceeds the number of "
616 "variable names given."));
620 fx.spec.v = v = dict_create_var (default_dict,
625 msg (SE, _("%s is a duplicate variable name."), fx.name[i]);
629 if (vfm_source != &input_program_source
630 && vfm_source != &file_type_source)
633 fx.spec.input = f->f;
634 convert_fmt_ItoO (&fx.spec.input, &v->print);
637 fx.spec.rec = fx.recno;
639 fx.spec.lc = fx.sc + f->f.w - 1;
641 fx.spec.width = v->width;
642 append_var_spec (&fx.spec);
649 /* Calls itself recursively to parse nested levels of parentheses.
650 Returns to its original caller: NULL, to indicate error; non-NULL,
651 but nothing useful, to indicate success (it returns a free()'d
653 static struct fmt_list *
654 fixed_parse_fortran (void)
656 struct fmt_list *head;
657 struct fmt_list *fl = NULL;
659 lex_get (); /* Skip opening parenthesis. */
663 fl = fl->next = xmalloc (sizeof *fl);
665 head = fl = xmalloc (sizeof *fl);
667 if (lex_integer_p ())
669 fl->count = lex_integer ();
677 fl->f.type = FMT_DESCEND;
679 fl->down = fixed_parse_fortran ();
684 else if (lex_match ('/'))
685 fl->f.type = FMT_NEWREC;
686 else if (!parse_format_specifier (&fl->f, 1)
687 || !check_input_specifier (&fl->f))
699 dump_fmt_list (head);
700 if (fx.cname < fx.nname)
702 msg (SE, _("There aren't enough format specifications "
703 "to match the number of variable names given."));
706 destroy_fmt_list (head, 1);
711 destroy_fmt_list (head, 0);
716 /* Displays a table giving information on fixed-format variable
717 parsing on DATA LIST. */
718 /* FIXME: The `Columns' column should be divided into three columns,
719 one for the starting column, one for the dash, one for the ending
720 column; then right-justify the starting column and left-justify the
723 dump_fixed_table (void)
725 struct dls_var_spec *spec;
728 const char *filename;
731 for (i = 0, spec = *first; spec; spec = spec->next)
733 t = tab_create (4, i + 1, 0);
734 tab_columns (t, TAB_COL_DOWN, 1);
735 tab_headers (t, 0, 0, 1, 0);
736 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
737 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
738 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
739 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
740 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
741 tab_hline (t, TAL_2, 0, 3, 1);
742 tab_dim (t, tab_natural_dimensions);
744 for (i = 1, spec = *first; spec; spec = spec->next, i++)
746 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
747 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
748 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
750 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
751 fmt_to_string (&spec->input));
754 if (*first == dls.spec)
756 filename = fh_handle_name (dls.handle);
757 if (filename == NULL)
759 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
760 sprintf (buf, (dls.handle != inline_file
762 ngettext("Reading %d record from file %s.",
763 "Reading %d records from file %s.",dls.nrec)
765 ngettext("Reading %d record from the command file.",
766 "Reading %d records from the command file.",
772 buf = local_alloc (strlen (_("Occurrence data specifications.")) + 1);
773 strcpy (buf, _("Occurrence data specifications."));
776 tab_title (t, 0, buf);
778 fh_handle_name (NULL);
782 /* Free-format parsing. */
787 struct dls_var_spec spec;
788 struct fmt_spec in, out;
798 if (!parse_DATA_LIST_vars (&name, &nname, PV_NONE))
802 if (!parse_format_specifier (&in, 0) || !check_input_specifier (&in))
804 if (!lex_force_match (')'))
806 convert_fmt_ItoO (&in, &out);
818 if (in.type == FMT_A || in.type == FMT_AHEX)
822 for (i = 0; i < nname; i++)
826 spec.v = v = dict_create_var (default_dict, name[i], width);
829 msg (SE, _("%s is a duplicate variable name."), name[i]);
833 v->print = v->write = out;
835 if (vfm_source != &input_program_source
836 && vfm_source != &file_type_source)
839 strcpy (spec.name, name[i]);
842 append_var_spec (&spec);
844 for (i = 0; i < nname; i++)
850 lex_error (_("expecting end of command"));
854 for (i = 0; i < nname; i++)
860 /* Displays a table giving information on free-format variable parsing
863 dump_free_table (void)
869 struct dls_var_spec *spec;
870 for (i = 0, spec = dls.spec; spec; spec = spec->next)
874 t = tab_create (2, i + 1, 0);
875 tab_columns (t, TAB_COL_DOWN, 1);
876 tab_headers (t, 0, 0, 1, 0);
877 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
878 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
879 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
880 tab_hline (t, TAL_2, 0, 1, 1);
881 tab_dim (t, tab_natural_dimensions);
884 struct dls_var_spec *spec;
886 for (i = 1, spec = dls.spec; spec; spec = spec->next, i++)
888 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
889 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
894 const char *filename;
896 filename = fh_handle_name (dls.handle);
897 if (filename == NULL)
900 (dls.handle != inline_file
901 ? _("Reading free-form data from file %s.")
902 : _("Reading free-form data from the command file.")),
907 fh_handle_name (NULL);
910 /* Input procedure. */
912 /* Pointer to relevant parsing data. Static just to avoid passing it
914 static struct data_list_pgm *dlsp;
916 /* Extracts a field from the current position in the current record.
917 Fields can be unquoted or quoted with single- or double-quote
918 characters. *RET_LEN is set to the field length, *RET_CP is set to
919 the field itself. After parsing the field, sets the current
920 position in the record to just past the field. Returns 0 on
921 failure or a 1-based column number indicating the beginning of the
924 cut_field (char **ret_cp, int *ret_len)
929 cp = dfm_get_record (dlsp->handle, &len);
935 /* Skip leading whitespace and commas. */
936 while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep)
941 /* Three types of fields: quoted with ', quoted with ", unquoted. */
942 if (*cp == '\'' || *cp == '"')
947 while (cp < ep && *cp != quote)
949 *ret_len = cp - *ret_cp;
953 msg (SW, _("Scope of string exceeds line."));
958 while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',')
960 *ret_len = cp - *ret_cp;
964 int beginning_column;
966 dfm_set_record (dlsp->handle, *ret_cp);
967 beginning_column = dfm_get_cur_col (dlsp->handle) + 1;
969 dfm_set_record (dlsp->handle, cp);
971 return beginning_column;
975 static int read_from_data_list_fixed (void);
976 static int read_from_data_list_free (void);
977 static int read_from_data_list_list (void);
979 /* FLAG==0: reads any number of cases into temp_case and calls
980 write_case() for each one, returns garbage. FLAG!=0: reads one
981 case into temp_case and returns -2 on eof, -1 otherwise.
982 Uses dlsp as the relevant parsing description. */
984 do_reading (int flag, write_case_func *write_case, write_case_data wc_data)
990 dfm_push (dlsp->handle);
995 func = read_from_data_list_fixed;
998 func = read_from_data_list_free;
1001 func = read_from_data_list_list;
1013 msg (SE, _("Attempt to read past end of file."));
1022 if (dlsp->end != NULL)
1026 printf ("end of file, setting %s to 1\n", dlsp->end->name);
1027 temp_case->data[dlsp->end->fv].f = 1.0;
1032 printf ("not end of file, setting %s to 0\n", dlsp->end->name);
1033 temp_case->data[dlsp->end->fv].f = 0.0;
1039 while (func () != -2)
1040 if (!write_case (wc_data))
1042 debug_printf ((_("abort in write_case()\n")));
1045 fh_close_handle (dlsp->handle);
1047 dfm_pop (dlsp->handle);
1052 /* Reads a case from the data file and parses it according to
1053 fixed-format syntax rules. */
1055 read_from_data_list_fixed (void)
1057 struct dls_var_spec *var_spec = dlsp->spec;
1060 if (!dfm_get_record (dlsp->handle, NULL))
1062 for (i = 1; i <= dlsp->nrec; i++)
1065 char *line = dfm_get_record (dlsp->handle, &len);
1069 /* Note that this can't occur on the first record. */
1070 msg (SW, _("Partial case of %d of %d records discarded."),
1075 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1079 data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc);
1080 di.v = &temp_case->data[var_spec->fv];
1082 di.f1 = var_spec->fc;
1083 di.format = var_spec->input;
1088 dfm_fwd_record (dlsp->handle);
1094 /* Reads a case from the data file and parses it according to
1095 free-format syntax rules. */
1097 read_from_data_list_free (void)
1099 struct dls_var_spec *var_spec;
1103 for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next)
1107 /* Cut out a field and read in a new record if necessary. */
1110 column = cut_field (&field, &len);
1114 if (dfm_get_record (dlsp->handle, NULL))
1115 dfm_fwd_record (dlsp->handle);
1116 if (!dfm_get_record (dlsp->handle, NULL))
1118 if (var_spec != dlsp->spec)
1119 msg (SW, _("Partial case discarded. The first variable "
1120 "missing was %s."), var_spec->name);
1130 di.v = &temp_case->data[var_spec->fv];
1133 di.format = var_spec->input;
1140 /* Reads a case from the data file and parses it according to
1141 list-format syntax rules. */
1143 read_from_data_list_list (void)
1145 struct dls_var_spec *var_spec;
1149 if (!dfm_get_record (dlsp->handle, NULL))
1152 for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next)
1154 /* Cut out a field and check for end-of-line. */
1155 int column = cut_field (&field, &len);
1160 msg (SW, _("Missing value(s) for all variables from %s onward. "
1161 "These will be filled with the system-missing value "
1162 "or blanks, as appropriate."),
1164 for (; var_spec; var_spec = var_spec->next)
1165 if (var_spec->width == 0)
1166 temp_case->data[var_spec->fv].f = SYSMIS;
1168 memset (temp_case->data[var_spec->fv].s, ' ', var_spec->width);
1177 di.v = &temp_case->data[var_spec->fv];
1180 di.format = var_spec->input;
1185 dfm_fwd_record (dlsp->handle);
1189 /* Destroys SPEC. */
1191 destroy_dls_var_spec (struct dls_var_spec *spec)
1193 struct dls_var_spec *next;
1195 while (spec != NULL)
1203 /* Destroys DATA LIST transformation PGM. */
1205 destroy_dls (struct trns_header *pgm)
1207 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1208 destroy_dls_var_spec (dls->spec);
1209 fh_close_handle (dls->handle);
1212 /* Note that since this is exclusively an input program, C is
1213 guaranteed to be temp_case. */
1215 read_one_case (struct trns_header *t, struct ccase *c UNUSED)
1217 dlsp = (struct data_list_pgm *) t;
1218 return do_reading (1, NULL, NULL);
1221 /* Reads all the records from the data file and passes them to
1224 data_list_source_read (write_case_func *write_case, write_case_data wc_data)
1227 do_reading (0, write_case, wc_data);
1230 /* Destroys the source's internal data. */
1232 data_list_source_destroy_source (void)
1234 destroy_dls (&dls.h);
1237 struct case_stream data_list_source =
1240 data_list_source_read,
1243 data_list_source_destroy_source,
1248 /* REPEATING DATA. */
1250 /* Represents a number or a variable. */
1251 struct rpd_num_or_var
1253 int num; /* Value, or 0. */
1254 struct variable *var; /* Variable, if number==0. */
1257 /* REPEATING DATA private data structure. */
1258 struct repeating_data_trns
1260 struct trns_header h;
1261 struct dls_var_spec *spec; /* Variable parsing specifications. */
1262 struct file_handle *handle; /* Input file, never NULL. */
1264 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1265 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1266 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1267 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1268 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1269 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1271 /* ID subcommand. */
1272 int id_beg, id_end; /* Beginning & end columns. */
1273 struct variable *id_var; /* DATA LIST variable. */
1274 struct fmt_spec id_spec; /* Input format spec. */
1275 union value *id_value; /* ID value. */
1277 write_case_func *write_case;
1278 write_case_data wc_data;
1281 /* Information about the transformation being parsed. */
1282 static struct repeating_data_trns rpd;
1284 int repeating_data_trns_proc (struct trns_header *, struct ccase *);
1285 void repeating_data_trns_free (struct trns_header *);
1286 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1287 static int parse_repeating_data (void);
1288 static void find_variable_input_spec (struct variable *v,
1289 struct fmt_spec *spec);
1291 /* Parses the REPEATING DATA command. */
1293 cmd_repeating_data (void)
1295 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1298 /* Bits are set when a particular subcommand has been seen. */
1301 lex_match_id ("REPEATING");
1302 lex_match_id ("DATA");
1304 assert (vfm_source == &input_program_source
1305 || vfm_source == &file_type_source);
1307 rpd.handle = default_handle;
1308 rpd.starts_beg.num = 0;
1309 rpd.starts_beg.var = NULL;
1310 rpd.starts_end = rpd.occurs = rpd.length = rpd.cont_beg
1311 = rpd.cont_end = rpd.starts_beg;
1312 rpd.id_beg = rpd.id_end = 0;
1314 rpd.id_value = NULL;
1323 if (lex_match_id ("FILE"))
1326 rpd.handle = fh_parse_file_handle ();
1329 if (rpd.handle != default_handle)
1331 msg (SE, _("REPEATING DATA must use the same file as its "
1332 "corresponding DATA LIST or FILE TYPE."));
1336 else if (lex_match_id ("STARTS"))
1341 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1346 if (!parse_num_or_var (&rpd.starts_beg, "STARTS beginning column"))
1349 lex_negative_to_dash ();
1350 if (lex_match ('-'))
1352 if (!parse_num_or_var (&rpd.starts_end, "STARTS ending column"))
1355 /* Otherwise, rpd.starts_end is left uninitialized.
1356 This is okay. We will initialize it later from the
1357 record length of the file. We can't do this now
1358 because we can't be sure that the user has specified
1359 the file handle yet. */
1362 if (rpd.starts_beg.num != 0 && rpd.starts_end.num != 0
1363 && rpd.starts_beg.num > rpd.starts_end.num)
1365 msg (SE, _("STARTS beginning column (%d) exceeds "
1366 "STARTS ending column (%d)."),
1367 rpd.starts_beg.num, rpd.starts_end.num);
1371 else if (lex_match_id ("OCCURS"))
1376 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1381 if (!parse_num_or_var (&rpd.occurs, "OCCURS"))
1384 else if (lex_match_id ("LENGTH"))
1389 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1394 if (!parse_num_or_var (&rpd.length, "LENGTH"))
1397 else if (lex_match_id ("CONTINUED"))
1402 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1407 if (!lex_match ('/'))
1409 if (!parse_num_or_var (&rpd.cont_beg, "CONTINUED beginning column"))
1412 lex_negative_to_dash ();
1414 && !parse_num_or_var (&rpd.cont_end,
1415 "CONTINUED ending column"))
1418 if (rpd.cont_beg.num != 0 && rpd.cont_end.num != 0
1419 && rpd.cont_beg.num > rpd.cont_end.num)
1421 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1422 "CONTINUED ending column (%d)."),
1423 rpd.cont_beg.num, rpd.cont_end.num);
1428 rpd.cont_beg.num = 1;
1430 else if (lex_match_id ("ID"))
1435 msg (SE, _("%s subcommand given multiple times."),"ID");
1440 if (!lex_force_int ())
1442 if (lex_integer () < 1)
1444 msg (SE, _("ID beginning column (%ld) must be positive."),
1448 rpd.id_beg = lex_integer ();
1451 lex_negative_to_dash ();
1453 if (lex_match ('-'))
1455 if (!lex_force_int ())
1457 if (lex_integer () < 1)
1459 msg (SE, _("ID ending column (%ld) must be positive."),
1463 if (lex_integer () < rpd.id_end)
1465 msg (SE, _("ID ending column (%ld) cannot be less than "
1466 "ID beginning column (%d)."),
1467 lex_integer (), rpd.id_beg);
1471 rpd.id_end = lex_integer ();
1474 else rpd.id_end = rpd.id_beg;
1476 if (!lex_force_match ('='))
1478 rpd.id_var = parse_variable ();
1479 if (rpd.id_var == NULL)
1482 find_variable_input_spec (rpd.id_var, &rpd.id_spec);
1483 rpd.id_value = xmalloc (sizeof *rpd.id_value * rpd.id_var->nv);
1485 else if (lex_match_id ("TABLE"))
1487 else if (lex_match_id ("NOTABLE"))
1489 else if (lex_match_id ("DATA"))
1497 if (!lex_force_match ('/'))
1501 /* Comes here when DATA specification encountered. */
1502 if ((seen & (1 | 2)) != (1 | 2))
1504 if ((seen & 1) == 0)
1505 msg (SE, _("Missing required specification STARTS."));
1506 if ((seen & 2) == 0)
1507 msg (SE, _("Missing required specification OCCURS."));
1511 /* Enforce ID restriction. */
1512 if ((seen & 16) && !(seen & 8))
1514 msg (SE, _("ID specified without CONTINUED."));
1518 /* Calculate starts_end, cont_end if necessary. */
1519 if (rpd.starts_end.num == 0 && rpd.starts_end.var == NULL)
1520 rpd.starts_end.num = fh_record_width (rpd.handle);
1521 if (rpd.cont_end.num == 0 && rpd.starts_end.var == NULL)
1522 rpd.cont_end.num = fh_record_width (rpd.handle);
1524 /* Calculate length if possible. */
1525 if ((seen & 4) == 0)
1527 struct dls_var_spec *iter;
1529 for (iter = rpd.spec; iter; iter = iter->next)
1531 if (iter->lc > rpd.length.num)
1532 rpd.length.num = iter->lc;
1534 assert (rpd.length.num != 0);
1538 if (!parse_repeating_data ())
1542 dump_fixed_table ();
1545 struct repeating_data_trns *new_trns;
1547 rpd.h.proc = repeating_data_trns_proc;
1548 rpd.h.free = repeating_data_trns_free;
1550 new_trns = xmalloc (sizeof *new_trns);
1551 memcpy (new_trns, &rpd, sizeof *new_trns);
1552 add_transformation ((struct trns_header *) new_trns);
1555 return lex_end_of_command ();
1558 /* Because of the way that DATA LIST is structured, it's not trivial
1559 to determine what input format is associated with a given variable.
1560 This function finds the input format specification for variable V
1561 and puts it in SPEC. */
1563 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1567 for (i = 0; i < n_trns; i++)
1569 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1571 if (pgm->h.proc == read_one_case)
1573 struct dls_var_spec *iter;
1575 for (iter = pgm->spec; iter; iter = iter->next)
1578 *spec = iter->input;
1587 /* Parses a number or a variable name from the syntax file and puts
1588 the results in VALUE. Ensures that the number is at least 1; else
1589 emits an error based on MESSAGE. Returns nonzero only if
1592 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1597 value->var = parse_variable ();
1598 if (value->var == NULL)
1600 if (value->var->type == ALPHA)
1602 msg (SE, _("String variable not allowed here."));
1606 else if (lex_integer_p ())
1608 value->num = lex_integer ();
1612 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1618 msg (SE, _("Variable or integer expected for %s."), message);
1624 /* Parses data specifications for repeating data groups. Taken from
1625 parse_fixed(). Returns nonzero only if successful. */
1627 parse_repeating_data (void)
1634 while (token != '.')
1636 fx.spec.rec = fx.recno;
1638 if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE))
1643 if (!fixed_parse_compatible ())
1646 else if (token == '(')
1650 if (!fixed_parse_fortran ())
1655 msg (SE, _("SPSS-like or FORTRAN-like format "
1656 "specification expected after variable names."));
1660 for (i = 0; i < fx.nname; i++)
1666 lex_error (_("expecting end of command"));
1673 for (i = 0; i < fx.nname; i++)
1679 /* Obtains the real value for rpd_num_or_var N in case C and returns
1680 it. The valid range is nonnegative numbers, but numbers outside
1681 this range can be returned and should be handled by the caller as
1684 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1689 assert (n->num == 0);
1692 double v = c->data[n->var->fv].f;
1694 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1703 /* Parses one record of repeated data and outputs corresponding cases.
1704 Repeating data is present in line LINE having length LEN.
1705 Repeating data begins in column BEG and continues through column
1706 END inclusive (1-based columns); occurrences are offset OFS columns
1707 from each other. C is the case that will be filled in; T is the
1708 REPEATING DATA transformation. The record ID will be verified if
1709 COMPARE_ID is nonzero; if it is zero, then the record ID is
1710 initialized to the ID present in the case (assuming that ID
1711 location was specified by the user). Returns number of occurrences
1712 parsed up to the specified maximum of MAX_OCCURS. */
1714 rpd_parse_record (int beg, int end, int ofs, struct ccase *c,
1715 struct repeating_data_trns *t,
1716 char *line, int len, int compare_id, int max_occurs)
1721 /* Handle record ID values. */
1724 union value id_temp[MAX_ELEMS_PER_VALUE];
1726 /* Parse record ID into V. */
1730 data_in_finite_line (&di, line, len, t->id_beg, t->id_end);
1731 di.v = compare_id ? id_temp : t->id_value;
1734 di.format = t->id_spec;
1741 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1743 char expected_str [MAX_FORMATTED_LEN + 1];
1744 char actual_str [MAX_FORMATTED_LEN + 1];
1746 data_out (expected_str, &t->id_var->print, t->id_value);
1747 expected_str[t->id_var->print.w] = '\0';
1749 data_out (actual_str, &t->id_var->print, id_temp);
1750 actual_str[t->id_var->print.w] = '\0';
1753 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1754 actual_str, expected_str);
1760 /* Iterate over the set of expected occurrences and record each of
1761 them as a separate case. FIXME: We need to execute any
1762 transformations that follow the current one. */
1766 for (occurrences = 0; occurrences < max_occurs; )
1768 if (cur + ofs > end + 1)
1773 struct dls_var_spec *var_spec = t->spec;
1775 for (; var_spec; var_spec = var_spec->next)
1777 int fc = var_spec->fc - 1 + cur;
1778 int lc = var_spec->lc - 1 + cur;
1780 if (fc > len && !warned && var_spec->input.type != FMT_A)
1785 _("Variable %s starting in column %d extends "
1786 "beyond physical record length of %d."),
1787 var_spec->v->name, fc, len);
1793 data_in_finite_line (&di, line, len, fc, lc);
1794 di.v = &c->data[var_spec->fv];
1797 di.format = var_spec->input;
1807 if (!t->write_case (t->wc_data))
1815 /* Analogous to read_one_case; reads one set of repetitions of the
1816 elements in the REPEATING DATA structure. Returns -1 on success,
1817 -2 on end of file or on failure. */
1819 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c)
1821 dfm_push (dlsp->handle);
1824 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1826 char *line; /* Current record. */
1827 int len; /* Length of current record. */
1829 int starts_beg; /* Starting column. */
1830 int starts_end; /* Ending column. */
1831 int occurs; /* Number of repetitions. */
1832 int length; /* Length of each occurrence. */
1833 int cont_beg; /* Starting column for continuation lines. */
1834 int cont_end; /* Ending column for continuation lines. */
1836 int occurs_left; /* Number of occurrences remaining. */
1838 int code; /* Return value from rpd_parse_record(). */
1840 int skip_first_record = 0;
1842 /* Read the current record. */
1843 dfm_bkwd_record (dlsp->handle, 1);
1844 line = dfm_get_record (dlsp->handle, &len);
1847 dfm_fwd_record (dlsp->handle);
1849 /* Calculate occurs, length. */
1850 occurs_left = occurs = realize_value (&t->occurs, c);
1853 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1856 starts_beg = realize_value (&t->starts_beg, c);
1857 if (starts_beg <= 0)
1859 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1864 starts_end = realize_value (&t->starts_end, c);
1865 if (starts_end < starts_beg)
1867 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1868 "beginning column (%d)."),
1869 starts_end, starts_beg);
1870 skip_first_record = 1;
1872 length = realize_value (&t->length, c);
1875 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1877 occurs = occurs_left = 1;
1879 cont_beg = realize_value (&t->cont_beg, c);
1882 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1887 cont_end = realize_value (&t->cont_end, c);
1888 if (cont_end < cont_beg)
1890 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1891 "beginning column (%d)."),
1892 cont_end, cont_beg);
1896 /* Parse the first record. */
1897 if (!skip_first_record)
1899 code = rpd_parse_record (starts_beg, starts_end, length, c, t, line,
1900 len, 0, occurs_left);
1904 else if (cont_beg == 0)
1907 /* Make sure, if some occurrences are left, that we have
1908 continuation records. */
1909 occurs_left -= code;
1910 if (occurs_left != 0 && cont_beg == 0)
1913 _("Number of repetitions specified on OCCURS (%d) "
1914 "exceed number of repetitions available in "
1915 "space on STARTS (%d), and CONTINUED not specified."),
1920 /* Go on to additional records. */
1921 while (occurs_left != 0)
1923 assert (occurs_left >= 0);
1925 /* Read in another record. */
1926 line = dfm_get_record (dlsp->handle, &len);
1930 _("Unexpected end of file with %d repetitions "
1931 "remaining out of %d."),
1932 occurs_left, occurs);
1935 dfm_fwd_record (dlsp->handle);
1937 /* Parse this record. */
1938 code = rpd_parse_record (cont_beg, cont_end, length, c, t, line,
1939 len, 1, occurs_left);
1942 occurs_left -= code;
1946 dfm_pop (dlsp->handle);
1948 /* FIXME: This is a kluge until we've implemented multiplexing of
1954 repeating_data_trns_free (struct trns_header *rpd_)
1956 struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_;
1958 destroy_dls_var_spec (rpd->spec);
1959 fh_close_handle (rpd->handle);
1960 free (rpd->id_value);
1963 /* This is a kluge. It is only here until I have more time
1964 tocome up with something better. It lets
1965 repeating_data_trns_proc() know how to write the cases that it
1968 repeating_data_set_write_case (struct trns_header *trns,
1969 write_case_func *write_case,
1970 write_case_data wc_data)
1972 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1974 assert (trns->proc == repeating_data_trns_proc);
1975 t->write_case = write_case;
1976 t->wc_data = wc_data;