1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "data-list.h"
30 #include "debug-print.h"
33 #include "file-handle.h"
43 /* Utility function. */
45 /* FIXME: Either REPEATING DATA must be the last transformation, or we
46 must multiplex the transformations that follow (i.e., perform them
47 for every case that we produce from a repetition instance).
48 Currently we do neither. We should do one or the other. */
50 /* Describes how to parse one variable. */
53 struct dls_var_spec *next; /* Next specification in list. */
55 /* Both free and fixed formats. */
56 struct fmt_spec input; /* Input format of this field. */
57 struct variable *v; /* Associated variable. Used only in
58 parsing. Not safe later. */
59 int fv; /* First value in case. */
61 /* Fixed format only. */
62 int rec; /* Record number (1-based). */
63 int fc, lc; /* Column numbers in record. */
65 /* Free format only. */
66 char name[9]; /* Name of variable. */
69 /* Constants for DATA LIST type. */
70 /* Must match table in cmd_data_list(). */
78 /* DATA LIST private data structure. */
83 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
84 struct file_handle *handle; /* Input file, never NULL. */
86 int type; /* A DLS_* constant. */
87 struct variable *end; /* Variable specified on END subcommand. */
88 int eof; /* End of file encountered. */
89 int nrec; /* Number of records. */
90 size_t case_size; /* Case size in bytes. */
93 static int parse_fixed (struct data_list_pgm *);
94 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
95 static void dump_fixed_table (const struct dls_var_spec *specs,
96 const struct file_handle *handle, int nrec);
97 static void dump_free_table (const struct data_list_pgm *);
98 static void destroy_dls_var_spec (struct dls_var_spec *);
99 static trns_free_func data_list_trns_free;
100 static trns_proc_func data_list_trns_proc;
102 /* Message title for REPEATING DATA. */
103 #define RPD_ERR "REPEATING DATA: "
108 /* DATA LIST program under construction. */
109 struct data_list_pgm *dls;
111 /* 0=print no table, 1=print table. (TABLE subcommand.) */
114 if (!case_source_is_complex (vfm_source))
115 discard_variables ();
117 dls = xmalloc (sizeof *dls);
118 dls->handle = default_handle;
123 dls->first = dls->last = NULL;
127 if (lex_match_id ("FILE"))
130 dls->handle = fh_parse_file_handle ();
133 if (case_source_is_class (vfm_source, &file_type_source_class)
134 && dls->handle != default_handle)
136 msg (SE, _("DATA LIST may not use a different file from "
137 "that specified on its surrounding FILE TYPE."));
141 else if (lex_match_id ("RECORDS"))
145 if (!lex_force_int ())
147 dls->nrec = lex_integer ();
151 else if (lex_match_id ("END"))
155 msg (SE, _("The END subcommand may only be specified once."));
160 if (!lex_force_id ())
162 dls->end = dict_lookup_var (default_dict, tokid);
164 dls->end = dict_create_var_assert (default_dict, tokid, 0);
167 else if (token == T_ID)
169 /* Must match DLS_* constants. */
170 static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE",
175 for (p = id; *p; p++)
176 if (lex_id_match (*p, tokid))
191 msg (SE, _("Only one of FIXED, FREE, or LIST may "
208 dls->case_size = dict_get_case_size (default_dict);
209 default_handle = dls->handle;
212 dls->type = DLS_FIXED;
216 if (dls->type == DLS_FREE)
222 if (dls->type == DLS_FIXED)
224 if (!parse_fixed (dls))
227 dump_fixed_table (dls->first, dls->handle, dls->nrec);
231 if (!parse_free (&dls->first, &dls->last))
234 dump_free_table (dls);
237 if (!dfm_open_for_reading (dls->handle))
240 if (vfm_source != NULL)
242 struct data_list_pgm *new_pgm;
244 dls->h.proc = data_list_trns_proc;
245 dls->h.free = data_list_trns_free;
247 new_pgm = xmalloc (sizeof *new_pgm);
248 memcpy (new_pgm, &dls, sizeof *new_pgm);
249 add_transformation (&new_pgm->h);
252 vfm_source = create_case_source (&data_list_source_class,
258 destroy_dls_var_spec (dls->first);
263 /* Adds SPEC to the linked list with head at FIRST and tail at
266 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
267 struct dls_var_spec *spec)
274 (*last)->next = spec;
278 /* Fixed-format parsing. */
280 /* Used for chaining together fortran-like format specifiers. */
283 struct fmt_list *next;
286 struct fmt_list *down;
289 /* State of parsing DATA LIST. */
290 struct fixed_parsing_state
292 char **name; /* Variable names. */
293 int name_cnt; /* Number of names. */
295 int recno; /* Index of current record. */
296 int sc; /* 1-based column number of starting column for
297 next field to output. */
300 static int fixed_parse_compatible (struct fixed_parsing_state *,
301 struct dls_var_spec **,
302 struct dls_var_spec **);
303 static int fixed_parse_fortran (struct fixed_parsing_state *,
304 struct dls_var_spec **,
305 struct dls_var_spec **);
307 /* Parses all the variable specifications for DATA LIST FIXED,
308 storing them into DLS. Returns nonzero if successful. */
310 parse_fixed (struct data_list_pgm *dls)
312 struct fixed_parsing_state fx;
320 while (lex_match ('/'))
323 if (lex_integer_p ())
325 if (lex_integer () < fx.recno)
327 msg (SE, _("The record number specified, %ld, is "
328 "before the previous record, %d. Data "
329 "fields must be listed in order of "
330 "increasing record number."),
331 lex_integer (), fx.recno - 1);
335 fx.recno = lex_integer ();
341 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
346 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
349 else if (token == '(')
351 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
356 msg (SE, _("SPSS-like or FORTRAN-like format "
357 "specification expected after variable names."));
361 for (i = 0; i < fx.name_cnt; i++)
365 if (dls->first == NULL)
367 msg (SE, _("At least one variable must be specified."));
370 if (dls->nrec && dls->last->rec > dls->nrec)
372 msg (SE, _("Variables are specified on records that "
373 "should not exist according to RECORDS subcommand."));
377 dls->nrec = dls->last->rec;
380 lex_error (_("expecting end of command"));
386 for (i = 0; i < fx.name_cnt; i++)
392 /* Parses a variable specification in the form 1-10 (A) based on
393 FX and adds specifications to the linked list with head at
394 FIRST and tail at LAST. */
396 fixed_parse_compatible (struct fixed_parsing_state *fx,
397 struct dls_var_spec **first, struct dls_var_spec **last)
399 struct fmt_spec input;
405 if (!lex_force_int ())
410 msg (SE, _("Column positions for fields must be positive."));
416 lex_negative_to_dash ();
419 if (!lex_force_int ())
424 msg (SE, _("Column positions for fields must be positive."));
429 msg (SE, _("The ending column for a field must be "
430 "greater than the starting column."));
439 /* Divide columns evenly. */
440 input.w = (lc - fc + 1) / fx->name_cnt;
441 if ((lc - fc + 1) % fx->name_cnt)
443 msg (SE, _("The %d columns %d-%d "
444 "can't be evenly divided into %d fields."),
445 lc - fc + 1, fc, lc, fx->name_cnt);
449 /* Format specifier. */
452 struct fmt_desc *fdp;
458 input.type = parse_format_specifier_name (&cp, 0);
459 if (input.type == -1)
463 msg (SE, _("A format specifier on this line "
464 "has extra characters on the end."));
474 if (lex_integer_p ())
476 if (lex_integer () < 1)
478 msg (SE, _("The value for number of decimal places "
479 "must be at least 1."));
483 input.d = lex_integer ();
489 fdp = &formats[input.type];
490 if (fdp->n_args < 2 && input.d)
492 msg (SE, _("Input format %s doesn't accept decimal places."),
500 if (!lex_force_match (')'))
508 if (!check_input_specifier (&input))
511 /* Start column for next specification. */
514 /* Width of variables to create. */
515 if (input.type == FMT_A || input.type == FMT_AHEX)
520 /* Create variables and var specs. */
521 for (i = 0; i < fx->name_cnt; i++)
523 struct dls_var_spec *spec;
526 v = dict_create_var (default_dict, fx->name[i], width);
529 convert_fmt_ItoO (&input, &v->print);
531 if (!case_source_is_complex (vfm_source))
536 v = dict_lookup_var_assert (default_dict, fx->name[i]);
537 if (vfm_source == NULL)
539 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
542 if ((width != 0) != (v->width != 0))
544 msg (SE, _("There is already a variable %s of a "
549 if (width != 0 && width != v->width)
551 msg (SE, _("There is already a string variable %s of a "
552 "different width."), fx->name[i]);
557 spec = xmalloc (sizeof *spec);
561 spec->rec = fx->recno;
562 spec->fc = fc + input.w * i;
563 spec->lc = spec->fc + input.w - 1;
564 append_var_spec (first, last, spec);
569 /* Destroy format list F and, if RECURSE is nonzero, all its
572 destroy_fmt_list (struct fmt_list *f, int recurse)
574 struct fmt_list *next;
579 if (recurse && f->f.type == FMT_DESCEND)
580 destroy_fmt_list (f->down, 1);
585 /* Takes a hierarchically structured fmt_list F as constructed by
586 fixed_parse_fortran(), and flattens it, adding the variable
587 specifications to the linked list with head FIRST and tail
588 LAST. NAME_IDX is used to take values from the list of names
589 in FX; it should initially point to a value of 0. */
591 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
592 struct dls_var_spec **first, struct dls_var_spec **last,
597 for (; f; f = f->next)
598 if (f->f.type == FMT_X)
600 else if (f->f.type == FMT_T)
602 else if (f->f.type == FMT_NEWREC)
604 fx->recno += f->count;
608 for (i = 0; i < f->count; i++)
609 if (f->f.type == FMT_DESCEND)
611 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
616 struct dls_var_spec *spec;
620 if (formats[f->f.type].cat & FCAT_STRING)
624 if (*name_idx >= fx->name_cnt)
626 msg (SE, _("The number of format "
627 "specifications exceeds the given number of "
632 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
635 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
639 if (!case_source_is_complex (vfm_source))
642 spec = xmalloc (sizeof *spec);
646 spec->rec = fx->recno;
648 spec->lc = fx->sc + f->f.w - 1;
649 append_var_spec (first, last, spec);
651 convert_fmt_ItoO (&spec->input, &v->print);
659 /* Recursively parses a FORTRAN-like format specification into
660 the linked list with head FIRST and tail TAIL. LEVEL is the
661 level of recursion, starting from 0. Returns the parsed
662 specification if successful, or a null pointer on failure. */
663 static struct fmt_list *
664 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
665 struct dls_var_spec **first,
666 struct dls_var_spec **last)
668 struct fmt_list *head = NULL;
669 struct fmt_list *tail = NULL;
671 lex_force_match ('(');
675 struct fmt_list *new = xmalloc (sizeof *new);
678 /* Append new to list. */
686 if (lex_integer_p ())
688 new->count = lex_integer ();
694 /* Parse format specifier. */
697 new->f.type = FMT_DESCEND;
698 new->down = fixed_parse_fortran_internal (fx, first, last);
699 if (new->down == NULL)
702 else if (lex_match ('/'))
703 new->f.type = FMT_NEWREC;
704 else if (!parse_format_specifier (&new->f, 1)
705 || !check_input_specifier (&new->f))
710 lex_force_match (')');
715 destroy_fmt_list (head, 0);
720 /* Parses a FORTRAN-like format specification into the linked
721 list with head FIRST and tail LAST. Returns nonzero if
724 fixed_parse_fortran (struct fixed_parsing_state *fx,
725 struct dls_var_spec **first, struct dls_var_spec **last)
727 struct fmt_list *list;
730 list = fixed_parse_fortran_internal (fx, first, last);
735 dump_fmt_list (fx, list, first, last, &name_idx);
736 destroy_fmt_list (list, 1);
737 if (name_idx < fx->name_cnt)
739 msg (SE, _("There aren't enough format specifications "
740 "to match the number of variable names given."));
747 /* Displays a table giving information on fixed-format variable
748 parsing on DATA LIST. */
749 /* FIXME: The `Columns' column should be divided into three columns,
750 one for the starting column, one for the dash, one for the ending
751 column; then right-justify the starting column and left-justify the
754 dump_fixed_table (const struct dls_var_spec *specs,
755 const struct file_handle *handle, int nrec)
757 const struct dls_var_spec *spec;
760 const char *filename;
763 for (i = 0, spec = specs; spec; spec = spec->next)
765 t = tab_create (4, i + 1, 0);
766 tab_columns (t, TAB_COL_DOWN, 1);
767 tab_headers (t, 0, 0, 1, 0);
768 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
769 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
770 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
771 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
772 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
773 tab_hline (t, TAL_2, 0, 3, 1);
774 tab_dim (t, tab_natural_dimensions);
776 for (i = 1, spec = specs; spec; spec = spec->next, i++)
778 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
779 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
780 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
782 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
783 fmt_to_string (&spec->input));
786 filename = handle_get_filename (handle);
787 if (filename == NULL)
789 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
790 sprintf (buf, (handle != inline_file
791 ? ngettext ("Reading %d record from file %s.",
792 "Reading %d records from file %s.", nrec)
793 : ngettext ("Reading %d record from the command file.",
794 "Reading %d records from the command file.",
798 tab_title (t, 0, buf);
803 /* Free-format parsing. */
805 /* Parses variable specifications for DATA LIST FREE and adds
806 them to the linked list with head FIRST and tail LAST.
807 Returns nonzero only if successful. */
809 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
814 struct fmt_spec input, output;
820 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
824 if (!parse_format_specifier (&input, 0)
825 || !check_input_specifier (&input)
826 || !lex_force_match (')'))
828 for (i = 0; i < name_cnt; i++)
833 convert_fmt_ItoO (&input, &output);
841 output = get_format();
844 if (input.type == FMT_A || input.type == FMT_AHEX)
848 for (i = 0; i < name_cnt; i++)
850 struct dls_var_spec *spec;
853 v = dict_create_var (default_dict, name[i], width);
856 msg (SE, _("%s is a duplicate variable name."), name[i]);
859 v->print = v->write = output;
861 if (!case_source_is_complex (vfm_source))
864 spec = xmalloc (sizeof *spec);
868 strcpy (spec->name, name[i]);
869 append_var_spec (first, last, spec);
871 for (i = 0; i < name_cnt; i++)
877 lex_error (_("expecting end of command"));
881 /* Displays a table giving information on free-format variable parsing
884 dump_free_table (const struct data_list_pgm *dls)
890 struct dls_var_spec *spec;
891 for (i = 0, spec = dls->first; spec; spec = spec->next)
895 t = tab_create (2, i + 1, 0);
896 tab_columns (t, TAB_COL_DOWN, 1);
897 tab_headers (t, 0, 0, 1, 0);
898 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
899 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
900 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
901 tab_hline (t, TAL_2, 0, 1, 1);
902 tab_dim (t, tab_natural_dimensions);
905 struct dls_var_spec *spec;
907 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
909 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
910 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
915 const char *filename;
917 filename = handle_get_filename (dls->handle);
918 if (filename == NULL)
921 (dls->handle != inline_file
922 ? _("Reading free-form data from file %s.")
923 : _("Reading free-form data from the command file.")),
930 /* Input procedure. */
932 /* Extracts a field from the current position in the current record.
933 Fields can be unquoted or quoted with single- or double-quote
934 characters. *RET_LEN is set to the field length, *RET_CP is set to
935 the field itself. After parsing the field, sets the current
936 position in the record to just past the field. Returns 0 on
937 failure or a 1-based column number indicating the beginning of the
940 cut_field (const struct data_list_pgm *dls, char **ret_cp, int *ret_len)
945 cp = dfm_get_record (dls->handle, &len);
951 /* Skip leading whitespace and commas. */
952 while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep)
957 /* Three types of fields: quoted with ', quoted with ", unquoted. */
958 if (*cp == '\'' || *cp == '"')
963 while (cp < ep && *cp != quote)
965 *ret_len = cp - *ret_cp;
969 msg (SW, _("Scope of string exceeds line."));
974 while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',')
976 *ret_len = cp - *ret_cp;
980 int beginning_column;
982 dfm_set_record (dls->handle, *ret_cp);
983 beginning_column = dfm_get_cur_col (dls->handle) + 1;
985 dfm_set_record (dls->handle, cp);
987 return beginning_column;
991 typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *);
992 static data_list_read_func read_from_data_list_fixed;
993 static data_list_read_func read_from_data_list_free;
994 static data_list_read_func read_from_data_list_list;
996 /* Returns the proper function to read the kind of DATA LIST
997 data specified by DLS. */
998 static data_list_read_func *
999 get_data_list_read_func (const struct data_list_pgm *dls)
1004 return read_from_data_list_fixed;
1007 return read_from_data_list_free;
1010 return read_from_data_list_list;
1017 /* Reads a case from the data file into C, parsing it according
1018 to fixed-format syntax rules in DLS. Returns -1 on success,
1019 -2 at end of file. */
1021 read_from_data_list_fixed (const struct data_list_pgm *dls,
1024 struct dls_var_spec *var_spec = dls->first;
1027 if (!dfm_get_record (dls->handle, NULL))
1029 for (i = 1; i <= dls->nrec; i++)
1032 char *line = dfm_get_record (dls->handle, &len);
1036 /* Note that this can't occur on the first record. */
1037 msg (SW, _("Partial case of %d of %d records discarded."),
1042 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1046 data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc);
1047 di.v = &c->data[var_spec->fv];
1049 di.f1 = var_spec->fc;
1050 di.format = var_spec->input;
1055 dfm_fwd_record (dls->handle);
1061 /* Reads a case from the data file into C, parsing it according
1062 to free-format syntax rules in DLS. Returns -1 on success,
1063 -2 at end of file. */
1065 read_from_data_list_free (const struct data_list_pgm *dls,
1068 struct dls_var_spec *var_spec;
1072 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1076 /* Cut out a field and read in a new record if necessary. */
1079 column = cut_field (dls, &field, &len);
1083 if (dfm_get_record (dls->handle, NULL))
1084 dfm_fwd_record (dls->handle);
1085 if (!dfm_get_record (dls->handle, NULL))
1087 if (var_spec != dls->first)
1088 msg (SW, _("Partial case discarded. The first variable "
1089 "missing was %s."), var_spec->name);
1099 di.v = &c->data[var_spec->fv];
1102 di.format = var_spec->input;
1109 /* Reads a case from the data file and parses it according to
1110 list-format syntax rules. Returns -1 on success, -2 at end of
1113 read_from_data_list_list (const struct data_list_pgm *dls,
1116 struct dls_var_spec *var_spec;
1120 if (!dfm_get_record (dls->handle, NULL))
1123 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1125 /* Cut out a field and check for end-of-line. */
1126 int column = cut_field (dls, &field, &len);
1130 if (get_undefined() )
1131 msg (SW, _("Missing value(s) for all variables from %s onward. "
1132 "These will be filled with the system-missing value "
1133 "or blanks, as appropriate."),
1135 for (; var_spec; var_spec = var_spec->next)
1137 int width = get_format_var_width (&var_spec->input);
1139 c->data[var_spec->fv].f = SYSMIS;
1141 memset (c->data[var_spec->fv].s, ' ', width);
1151 di.v = &c->data[var_spec->fv];
1154 di.format = var_spec->input;
1159 dfm_fwd_record (dls->handle);
1163 /* Destroys SPEC. */
1165 destroy_dls_var_spec (struct dls_var_spec *spec)
1167 struct dls_var_spec *next;
1169 while (spec != NULL)
1177 /* Destroys DATA LIST transformation PGM. */
1179 data_list_trns_free (struct trns_header *pgm)
1181 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1182 destroy_dls_var_spec (dls->first);
1183 fh_close_handle (dls->handle);
1187 /* Handle DATA LIST transformation T, parsing data into C. */
1189 data_list_trns_proc (struct trns_header *t, struct ccase *c,
1190 int case_num UNUSED)
1192 struct data_list_pgm *dls = (struct data_list_pgm *) t;
1193 data_list_read_func *read_func;
1196 dfm_push (dls->handle);
1198 read_func = get_data_list_read_func (dls);
1199 retval = read_func (dls, c);
1201 /* Handle end of file. */
1204 /* If we already encountered end of file then this is an
1208 msg (SE, _("Attempt to read past end of file."));
1210 dfm_pop (dls->handle);
1214 /* Otherwise simply note it. */
1220 /* If there was an END subcommand handle it. */
1221 if (dls->end != NULL)
1225 c->data[dls->end->fv].f = 1.0;
1229 c->data[dls->end->fv].f = 0.0;
1232 dfm_pop (dls->handle);
1237 /* Reads all the records from the data file and passes them to
1240 data_list_source_read (struct case_source *source,
1242 write_case_func *write_case, write_case_data wc_data)
1244 struct data_list_pgm *dls = source->aux;
1245 data_list_read_func *read_func = get_data_list_read_func (dls);
1247 dfm_push (dls->handle);
1248 while (read_func (dls, c) != -2)
1249 if (!write_case (wc_data))
1251 dfm_pop (dls->handle);
1253 fh_close_handle (dls->handle);
1256 /* Destroys the source's internal data. */
1258 data_list_source_destroy (struct case_source *source)
1260 data_list_trns_free (source->aux);
1263 const struct case_source_class data_list_source_class =
1267 data_list_source_read,
1268 data_list_source_destroy,
1271 /* REPEATING DATA. */
1273 /* Represents a number or a variable. */
1274 struct rpd_num_or_var
1276 int num; /* Value, or 0. */
1277 struct variable *var; /* Variable, if number==0. */
1280 /* REPEATING DATA private data structure. */
1281 struct repeating_data_trns
1283 struct trns_header h;
1284 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1285 struct file_handle *handle; /* Input file, never NULL. */
1287 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1288 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1289 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1290 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1291 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1292 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1294 /* ID subcommand. */
1295 int id_beg, id_end; /* Beginning & end columns. */
1296 struct variable *id_var; /* DATA LIST variable. */
1297 struct fmt_spec id_spec; /* Input format spec. */
1298 union value *id_value; /* ID value. */
1300 write_case_func *write_case;
1301 write_case_data wc_data;
1304 static trns_free_func repeating_data_trns_free;
1305 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1306 static int parse_repeating_data (struct dls_var_spec **,
1307 struct dls_var_spec **);
1308 static void find_variable_input_spec (struct variable *v,
1309 struct fmt_spec *spec);
1311 /* Parses the REPEATING DATA command. */
1313 cmd_repeating_data (void)
1315 struct repeating_data_trns *rpd;
1317 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1320 /* Bits are set when a particular subcommand has been seen. */
1323 assert (case_source_is_complex (vfm_source));
1325 rpd = xmalloc (sizeof *rpd);
1326 rpd->handle = default_handle;
1327 rpd->first = rpd->last = NULL;
1328 rpd->starts_beg.num = 0;
1329 rpd->starts_beg.var = NULL;
1330 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1331 = rpd->cont_end = rpd->starts_beg;
1332 rpd->id_beg = rpd->id_end = 0;
1334 rpd->id_value = NULL;
1340 if (lex_match_id ("FILE"))
1343 rpd->handle = fh_parse_file_handle ();
1346 if (rpd->handle != default_handle)
1348 msg (SE, _("REPEATING DATA must use the same file as its "
1349 "corresponding DATA LIST or FILE TYPE."));
1353 else if (lex_match_id ("STARTS"))
1358 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1363 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1366 lex_negative_to_dash ();
1367 if (lex_match ('-'))
1369 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1372 /* Otherwise, rpd->starts_end is left uninitialized.
1373 This is okay. We will initialize it later from the
1374 record length of the file. We can't do this now
1375 because we can't be sure that the user has specified
1376 the file handle yet. */
1379 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1380 && rpd->starts_beg.num > rpd->starts_end.num)
1382 msg (SE, _("STARTS beginning column (%d) exceeds "
1383 "STARTS ending column (%d)."),
1384 rpd->starts_beg.num, rpd->starts_end.num);
1388 else if (lex_match_id ("OCCURS"))
1393 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1398 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1401 else if (lex_match_id ("LENGTH"))
1406 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1411 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1414 else if (lex_match_id ("CONTINUED"))
1419 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1424 if (!lex_match ('/'))
1426 if (!parse_num_or_var (&rpd->cont_beg, "CONTINUED beginning column"))
1429 lex_negative_to_dash ();
1431 && !parse_num_or_var (&rpd->cont_end,
1432 "CONTINUED ending column"))
1435 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1436 && rpd->cont_beg.num > rpd->cont_end.num)
1438 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1439 "CONTINUED ending column (%d)."),
1440 rpd->cont_beg.num, rpd->cont_end.num);
1445 rpd->cont_beg.num = 1;
1447 else if (lex_match_id ("ID"))
1452 msg (SE, _("%s subcommand given multiple times."),"ID");
1457 if (!lex_force_int ())
1459 if (lex_integer () < 1)
1461 msg (SE, _("ID beginning column (%ld) must be positive."),
1465 rpd->id_beg = lex_integer ();
1468 lex_negative_to_dash ();
1470 if (lex_match ('-'))
1472 if (!lex_force_int ())
1474 if (lex_integer () < 1)
1476 msg (SE, _("ID ending column (%ld) must be positive."),
1480 if (lex_integer () < rpd->id_end)
1482 msg (SE, _("ID ending column (%ld) cannot be less than "
1483 "ID beginning column (%d)."),
1484 lex_integer (), rpd->id_beg);
1488 rpd->id_end = lex_integer ();
1491 else rpd->id_end = rpd->id_beg;
1493 if (!lex_force_match ('='))
1495 rpd->id_var = parse_variable ();
1496 if (rpd->id_var == NULL)
1499 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1500 rpd->id_value = xmalloc (sizeof *rpd->id_value * rpd->id_var->nv);
1502 else if (lex_match_id ("TABLE"))
1504 else if (lex_match_id ("NOTABLE"))
1506 else if (lex_match_id ("DATA"))
1514 if (!lex_force_match ('/'))
1518 /* Comes here when DATA specification encountered. */
1519 if ((seen & (1 | 2)) != (1 | 2))
1521 if ((seen & 1) == 0)
1522 msg (SE, _("Missing required specification STARTS."));
1523 if ((seen & 2) == 0)
1524 msg (SE, _("Missing required specification OCCURS."));
1528 /* Enforce ID restriction. */
1529 if ((seen & 16) && !(seen & 8))
1531 msg (SE, _("ID specified without CONTINUED."));
1535 /* Calculate starts_end, cont_end if necessary. */
1536 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1537 rpd->starts_end.num = handle_get_record_width (rpd->handle);
1538 if (rpd->cont_end.num == 0 && rpd->starts_end.var == NULL)
1539 rpd->cont_end.num = handle_get_record_width (rpd->handle);
1541 /* Calculate length if possible. */
1542 if ((seen & 4) == 0)
1544 struct dls_var_spec *iter;
1546 for (iter = rpd->first; iter; iter = iter->next)
1548 if (iter->lc > rpd->length.num)
1549 rpd->length.num = iter->lc;
1551 assert (rpd->length.num != 0);
1555 if (!parse_repeating_data (&rpd->first, &rpd->last))
1559 dump_fixed_table (rpd->first, rpd->handle, rpd->last->rec);
1562 struct repeating_data_trns *new_trns;
1564 rpd->h.proc = repeating_data_trns_proc;
1565 rpd->h.free = repeating_data_trns_free;
1567 new_trns = xmalloc (sizeof *new_trns);
1568 memcpy (new_trns, &rpd, sizeof *new_trns);
1569 add_transformation ((struct trns_header *) new_trns);
1572 return lex_end_of_command ();
1575 destroy_dls_var_spec (rpd->first);
1576 free (rpd->id_value);
1580 /* Finds the input format specification for variable V and puts
1581 it in SPEC. Because of the way that DATA LIST is structured,
1582 this is nontrivial. */
1584 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1588 for (i = 0; i < n_trns; i++)
1590 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1592 if (pgm->h.proc == data_list_trns_proc)
1594 struct dls_var_spec *iter;
1596 for (iter = pgm->first; iter; iter = iter->next)
1599 *spec = iter->input;
1608 /* Parses a number or a variable name from the syntax file and puts
1609 the results in VALUE. Ensures that the number is at least 1; else
1610 emits an error based on MESSAGE. Returns nonzero only if
1613 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1618 value->var = parse_variable ();
1619 if (value->var == NULL)
1621 if (value->var->type == ALPHA)
1623 msg (SE, _("String variable not allowed here."));
1627 else if (lex_integer_p ())
1629 value->num = lex_integer ();
1633 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1639 msg (SE, _("Variable or integer expected for %s."), message);
1645 /* Parses data specifications for repeating data groups, adding
1646 them to the linked list with head FIRST and tail LAST.
1647 Returns nonzero only if successful. */
1649 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1651 struct fixed_parsing_state fx;
1657 while (token != '.')
1659 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1664 if (!fixed_parse_compatible (&fx, first, last))
1667 else if (token == '(')
1669 if (!fixed_parse_fortran (&fx, first, last))
1674 msg (SE, _("SPSS-like or FORTRAN-like format "
1675 "specification expected after variable names."));
1679 for (i = 0; i < fx.name_cnt; i++)
1685 lex_error (_("expecting end of command"));
1692 for (i = 0; i < fx.name_cnt; i++)
1698 /* Obtains the real value for rpd_num_or_var N in case C and returns
1699 it. The valid range is nonnegative numbers, but numbers outside
1700 this range can be returned and should be handled by the caller as
1703 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1708 assert (n->num == 0);
1711 double v = c->data[n->var->fv].f;
1713 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1722 /* Parameter record passed to rpd_parse_record(). */
1723 struct rpd_parse_info
1725 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1726 const char *line; /* Line being parsed. */
1727 size_t len; /* Line length. */
1728 int beg, end; /* First and last column of first occurrence. */
1729 int ofs; /* Column offset between repeated occurrences. */
1730 struct ccase *c; /* Case to fill in. */
1731 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1732 int max_occurs; /* Max number of occurrences to parse. */
1735 /* Parses one record of repeated data and outputs corresponding
1736 cases. Returns number of occurrences parsed up to the
1737 maximum specified in INFO. */
1739 rpd_parse_record (const struct rpd_parse_info *info)
1741 struct repeating_data_trns *t = info->trns;
1742 int cur = info->beg;
1745 /* Handle record ID values. */
1748 union value id_temp[MAX_ELEMS_PER_VALUE];
1750 /* Parse record ID into V. */
1754 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1755 di.v = info->verify_id ? id_temp : t->id_value;
1758 di.format = t->id_spec;
1765 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1767 char expected_str [MAX_FORMATTED_LEN + 1];
1768 char actual_str [MAX_FORMATTED_LEN + 1];
1770 data_out (expected_str, &t->id_var->print, t->id_value);
1771 expected_str[t->id_var->print.w] = '\0';
1773 data_out (actual_str, &t->id_var->print, id_temp);
1774 actual_str[t->id_var->print.w] = '\0';
1777 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1778 actual_str, expected_str);
1784 /* Iterate over the set of expected occurrences and record each of
1785 them as a separate case. FIXME: We need to execute any
1786 transformations that follow the current one. */
1790 for (occurrences = 0; occurrences < info->max_occurs; )
1792 if (cur + info->ofs > info->end + 1)
1797 struct dls_var_spec *var_spec = t->first;
1799 for (; var_spec; var_spec = var_spec->next)
1801 int fc = var_spec->fc - 1 + cur;
1802 int lc = var_spec->lc - 1 + cur;
1804 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1809 _("Variable %s starting in column %d extends "
1810 "beyond physical record length of %d."),
1811 var_spec->v->name, fc, info->len);
1817 data_in_finite_line (&di, info->line, info->len, fc, lc);
1818 di.v = &info->c->data[var_spec->fv];
1821 di.format = var_spec->input;
1831 if (!t->write_case (t->wc_data))
1839 /* Reads one set of repetitions of the elements in the REPEATING
1840 DATA structure. Returns -1 on success, -2 on end of file or
1843 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c,
1844 int case_num UNUSED)
1846 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1848 char *line; /* Current record. */
1849 int len; /* Length of current record. */
1851 int starts_beg; /* Starting column. */
1852 int starts_end; /* Ending column. */
1853 int occurs; /* Number of repetitions. */
1854 int length; /* Length of each occurrence. */
1855 int cont_beg; /* Starting column for continuation lines. */
1856 int cont_end; /* Ending column for continuation lines. */
1858 int occurs_left; /* Number of occurrences remaining. */
1860 int code; /* Return value from rpd_parse_record(). */
1862 int skip_first_record = 0;
1864 dfm_push (t->handle);
1866 /* Read the current record. */
1867 dfm_bkwd_record (t->handle, 1);
1868 line = dfm_get_record (t->handle, &len);
1871 dfm_fwd_record (t->handle);
1873 /* Calculate occurs, length. */
1874 occurs_left = occurs = realize_value (&t->occurs, c);
1877 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1880 starts_beg = realize_value (&t->starts_beg, c);
1881 if (starts_beg <= 0)
1883 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1888 starts_end = realize_value (&t->starts_end, c);
1889 if (starts_end < starts_beg)
1891 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1892 "beginning column (%d)."),
1893 starts_end, starts_beg);
1894 skip_first_record = 1;
1896 length = realize_value (&t->length, c);
1899 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1901 occurs = occurs_left = 1;
1903 cont_beg = realize_value (&t->cont_beg, c);
1906 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1911 cont_end = realize_value (&t->cont_end, c);
1912 if (cont_end < cont_beg)
1914 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1915 "beginning column (%d)."),
1916 cont_end, cont_beg);
1920 /* Parse the first record. */
1921 if (!skip_first_record)
1923 struct rpd_parse_info info;
1927 info.beg = starts_beg;
1928 info.end = starts_end;
1932 info.max_occurs = occurs_left;
1933 code = rpd_parse_record (&info);
1936 occurs_left -= code;
1938 else if (cont_beg == 0)
1941 /* Make sure, if some occurrences are left, that we have
1942 continuation records. */
1943 if (occurs_left > 0 && cont_beg == 0)
1946 _("Number of repetitions specified on OCCURS (%d) "
1947 "exceed number of repetitions available in "
1948 "space on STARTS (%d), and CONTINUED not specified."),
1949 occurs, (starts_end - starts_beg + 1) / length);
1953 /* Go on to additional records. */
1954 while (occurs_left != 0)
1956 struct rpd_parse_info info;
1958 assert (occurs_left >= 0);
1960 /* Read in another record. */
1961 line = dfm_get_record (t->handle, &len);
1965 _("Unexpected end of file with %d repetitions "
1966 "remaining out of %d."),
1967 occurs_left, occurs);
1970 dfm_fwd_record (t->handle);
1972 /* Parse this record. */
1976 info.beg = cont_beg;
1977 info.end = cont_end;
1981 info.max_occurs = occurs_left;
1982 code = rpd_parse_record (&info);;
1985 occurs_left -= code;
1988 dfm_pop (t->handle);
1990 /* FIXME: This is a kluge until we've implemented multiplexing of
1995 /* Frees a REPEATING DATA transformation. */
1997 repeating_data_trns_free (struct trns_header *rpd_)
1999 struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_;
2001 destroy_dls_var_spec (rpd->first);
2002 fh_close_handle (rpd->handle);
2003 free (rpd->id_value);
2006 /* Lets repeating_data_trns_proc() know how to write the cases
2007 that it composes. Not elegant. */
2009 repeating_data_set_write_case (struct trns_header *trns,
2010 write_case_func *write_case,
2011 write_case_data wc_data)
2013 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
2015 assert (trns->proc == repeating_data_trns_proc);
2016 t->write_case = write_case;
2017 t->wc_data = wc_data;