1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "data-list.h"
30 #include "debug-print.h"
33 #include "file-handle.h"
43 /* Utility function. */
45 /* FIXME: Either REPEATING DATA must be the last transformation, or we
46 must multiplex the transformations that follow (i.e., perform them
47 for every case that we produce from a repetition instance).
48 Currently we do neither. We should do one or the other. */
50 /* Describes how to parse one variable. */
53 struct dls_var_spec *next; /* Next specification in list. */
55 /* Both free and fixed formats. */
56 struct fmt_spec input; /* Input format of this field. */
57 struct variable *v; /* Associated variable. Used only in
58 parsing. Not safe later. */
59 int fv; /* First value in case. */
61 /* Fixed format only. */
62 int rec; /* Record number (1-based). */
63 int fc, lc; /* Column numbers in record. */
65 /* Free format only. */
66 char name[9]; /* Name of variable. */
69 /* Constants for DATA LIST type. */
70 /* Must match table in cmd_data_list(). */
78 /* DATA LIST private data structure. */
83 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
84 struct file_handle *handle; /* Input file, never NULL. */
86 int type; /* A DLS_* constant. */
87 struct variable *end; /* Variable specified on END subcommand. */
88 int eof; /* End of file encountered. */
89 int nrec; /* Number of records. */
90 size_t case_size; /* Case size in bytes. */
91 int delim; /* Specified delimeter */
94 static int parse_fixed (struct data_list_pgm *);
95 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
96 static void dump_fixed_table (const struct dls_var_spec *specs,
97 const struct file_handle *handle, int nrec);
98 static void dump_free_table (const struct data_list_pgm *);
99 static void destroy_dls_var_spec (struct dls_var_spec *);
100 static trns_free_func data_list_trns_free;
101 static trns_proc_func data_list_trns_proc;
103 /* Message title for REPEATING DATA. */
104 #define RPD_ERR "REPEATING DATA: "
109 /* DATA LIST program under construction. */
110 struct data_list_pgm *dls;
112 /* 0=print no table, 1=print table. (TABLE subcommand.) */
115 if (!case_source_is_complex (vfm_source))
116 discard_variables ();
118 dls = xmalloc (sizeof *dls);
119 dls->handle = default_handle;
125 dls->first = dls->last = NULL;
129 if (lex_match_id ("FILE"))
132 dls->handle = fh_parse_file_handle ();
135 if (case_source_is_class (vfm_source, &file_type_source_class)
136 && dls->handle != default_handle)
138 msg (SE, _("DATA LIST may not use a different file from "
139 "that specified on its surrounding FILE TYPE."));
143 else if (lex_match_id ("RECORDS"))
147 if (!lex_force_int ())
149 dls->nrec = lex_integer ();
153 else if (lex_match_id ("END"))
157 msg (SE, _("The END subcommand may only be specified once."));
162 if (!lex_force_id ())
164 dls->end = dict_lookup_var (default_dict, tokid);
166 dls->end = dict_create_var_assert (default_dict, tokid, 0);
169 else if (token == T_ID)
171 /* Must match DLS_* constants. */
172 static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE",
177 for (p = id; *p; p++)
178 if (lex_id_match (*p, tokid))
193 msg (SE, _("Only one of FIXED, FREE, or LIST may "
203 else if (token=='(') {
205 if (lex_match_id ("TAB")) {
217 dls->case_size = dict_get_case_size (default_dict);
218 default_handle = dls->handle;
221 dls->type = DLS_FIXED;
225 if (dls->type == DLS_FREE)
231 if (dls->type == DLS_FIXED)
233 if (!parse_fixed (dls))
236 dump_fixed_table (dls->first, dls->handle, dls->nrec);
240 if (!parse_free (&dls->first, &dls->last))
243 dump_free_table (dls);
246 if (!dfm_open_for_reading (dls->handle))
249 if (vfm_source != NULL)
251 struct data_list_pgm *new_pgm;
253 dls->h.proc = data_list_trns_proc;
254 dls->h.free = data_list_trns_free;
256 new_pgm = xmalloc (sizeof *new_pgm);
257 memcpy (new_pgm, &dls, sizeof *new_pgm);
258 add_transformation (&new_pgm->h);
261 vfm_source = create_case_source (&data_list_source_class,
267 destroy_dls_var_spec (dls->first);
272 /* Adds SPEC to the linked list with head at FIRST and tail at
275 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
276 struct dls_var_spec *spec)
283 (*last)->next = spec;
287 /* Fixed-format parsing. */
289 /* Used for chaining together fortran-like format specifiers. */
292 struct fmt_list *next;
295 struct fmt_list *down;
298 /* State of parsing DATA LIST. */
299 struct fixed_parsing_state
301 char **name; /* Variable names. */
302 int name_cnt; /* Number of names. */
304 int recno; /* Index of current record. */
305 int sc; /* 1-based column number of starting column for
306 next field to output. */
309 static int fixed_parse_compatible (struct fixed_parsing_state *,
310 struct dls_var_spec **,
311 struct dls_var_spec **);
312 static int fixed_parse_fortran (struct fixed_parsing_state *,
313 struct dls_var_spec **,
314 struct dls_var_spec **);
316 /* Parses all the variable specifications for DATA LIST FIXED,
317 storing them into DLS. Returns nonzero if successful. */
319 parse_fixed (struct data_list_pgm *dls)
321 struct fixed_parsing_state fx;
329 while (lex_match ('/'))
332 if (lex_integer_p ())
334 if (lex_integer () < fx.recno)
336 msg (SE, _("The record number specified, %ld, is "
337 "before the previous record, %d. Data "
338 "fields must be listed in order of "
339 "increasing record number."),
340 lex_integer (), fx.recno - 1);
344 fx.recno = lex_integer ();
350 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
355 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
358 else if (token == '(')
360 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
365 msg (SE, _("SPSS-like or FORTRAN-like format "
366 "specification expected after variable names."));
370 for (i = 0; i < fx.name_cnt; i++)
374 if (dls->first == NULL)
376 msg (SE, _("At least one variable must be specified."));
379 if (dls->nrec && dls->last->rec > dls->nrec)
381 msg (SE, _("Variables are specified on records that "
382 "should not exist according to RECORDS subcommand."));
386 dls->nrec = dls->last->rec;
389 lex_error (_("expecting end of command"));
395 for (i = 0; i < fx.name_cnt; i++)
401 /* Parses a variable specification in the form 1-10 (A) based on
402 FX and adds specifications to the linked list with head at
403 FIRST and tail at LAST. */
405 fixed_parse_compatible (struct fixed_parsing_state *fx,
406 struct dls_var_spec **first, struct dls_var_spec **last)
408 struct fmt_spec input;
414 if (!lex_force_int ())
419 msg (SE, _("Column positions for fields must be positive."));
425 lex_negative_to_dash ();
428 if (!lex_force_int ())
433 msg (SE, _("Column positions for fields must be positive."));
438 msg (SE, _("The ending column for a field must be "
439 "greater than the starting column."));
448 /* Divide columns evenly. */
449 input.w = (lc - fc + 1) / fx->name_cnt;
450 if ((lc - fc + 1) % fx->name_cnt)
452 msg (SE, _("The %d columns %d-%d "
453 "can't be evenly divided into %d fields."),
454 lc - fc + 1, fc, lc, fx->name_cnt);
458 /* Format specifier. */
461 struct fmt_desc *fdp;
467 input.type = parse_format_specifier_name (&cp, 0);
468 if (input.type == -1)
472 msg (SE, _("A format specifier on this line "
473 "has extra characters on the end."));
483 if (lex_integer_p ())
485 if (lex_integer () < 1)
487 msg (SE, _("The value for number of decimal places "
488 "must be at least 1."));
492 input.d = lex_integer ();
498 fdp = &formats[input.type];
499 if (fdp->n_args < 2 && input.d)
501 msg (SE, _("Input format %s doesn't accept decimal places."),
509 if (!lex_force_match (')'))
517 if (!check_input_specifier (&input))
520 /* Start column for next specification. */
523 /* Width of variables to create. */
524 if (input.type == FMT_A || input.type == FMT_AHEX)
529 /* Create variables and var specs. */
530 for (i = 0; i < fx->name_cnt; i++)
532 struct dls_var_spec *spec;
535 v = dict_create_var (default_dict, fx->name[i], width);
538 convert_fmt_ItoO (&input, &v->print);
540 if (!case_source_is_complex (vfm_source))
545 v = dict_lookup_var_assert (default_dict, fx->name[i]);
546 if (vfm_source == NULL)
548 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
551 if ((width != 0) != (v->width != 0))
553 msg (SE, _("There is already a variable %s of a "
558 if (width != 0 && width != v->width)
560 msg (SE, _("There is already a string variable %s of a "
561 "different width."), fx->name[i]);
566 spec = xmalloc (sizeof *spec);
570 spec->rec = fx->recno;
571 spec->fc = fc + input.w * i;
572 spec->lc = spec->fc + input.w - 1;
573 append_var_spec (first, last, spec);
578 /* Destroy format list F and, if RECURSE is nonzero, all its
581 destroy_fmt_list (struct fmt_list *f, int recurse)
583 struct fmt_list *next;
588 if (recurse && f->f.type == FMT_DESCEND)
589 destroy_fmt_list (f->down, 1);
594 /* Takes a hierarchically structured fmt_list F as constructed by
595 fixed_parse_fortran(), and flattens it, adding the variable
596 specifications to the linked list with head FIRST and tail
597 LAST. NAME_IDX is used to take values from the list of names
598 in FX; it should initially point to a value of 0. */
600 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
601 struct dls_var_spec **first, struct dls_var_spec **last,
606 for (; f; f = f->next)
607 if (f->f.type == FMT_X)
609 else if (f->f.type == FMT_T)
611 else if (f->f.type == FMT_NEWREC)
613 fx->recno += f->count;
617 for (i = 0; i < f->count; i++)
618 if (f->f.type == FMT_DESCEND)
620 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
625 struct dls_var_spec *spec;
629 if (formats[f->f.type].cat & FCAT_STRING)
633 if (*name_idx >= fx->name_cnt)
635 msg (SE, _("The number of format "
636 "specifications exceeds the given number of "
641 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
644 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
648 if (!case_source_is_complex (vfm_source))
651 spec = xmalloc (sizeof *spec);
655 spec->rec = fx->recno;
657 spec->lc = fx->sc + f->f.w - 1;
658 append_var_spec (first, last, spec);
660 convert_fmt_ItoO (&spec->input, &v->print);
668 /* Recursively parses a FORTRAN-like format specification into
669 the linked list with head FIRST and tail TAIL. LEVEL is the
670 level of recursion, starting from 0. Returns the parsed
671 specification if successful, or a null pointer on failure. */
672 static struct fmt_list *
673 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
674 struct dls_var_spec **first,
675 struct dls_var_spec **last)
677 struct fmt_list *head = NULL;
678 struct fmt_list *tail = NULL;
680 lex_force_match ('(');
684 struct fmt_list *new = xmalloc (sizeof *new);
687 /* Append new to list. */
695 if (lex_integer_p ())
697 new->count = lex_integer ();
703 /* Parse format specifier. */
706 new->f.type = FMT_DESCEND;
707 new->down = fixed_parse_fortran_internal (fx, first, last);
708 if (new->down == NULL)
711 else if (lex_match ('/'))
712 new->f.type = FMT_NEWREC;
713 else if (!parse_format_specifier (&new->f, 1)
714 || !check_input_specifier (&new->f))
719 lex_force_match (')');
724 destroy_fmt_list (head, 0);
729 /* Parses a FORTRAN-like format specification into the linked
730 list with head FIRST and tail LAST. Returns nonzero if
733 fixed_parse_fortran (struct fixed_parsing_state *fx,
734 struct dls_var_spec **first, struct dls_var_spec **last)
736 struct fmt_list *list;
739 list = fixed_parse_fortran_internal (fx, first, last);
744 dump_fmt_list (fx, list, first, last, &name_idx);
745 destroy_fmt_list (list, 1);
746 if (name_idx < fx->name_cnt)
748 msg (SE, _("There aren't enough format specifications "
749 "to match the number of variable names given."));
756 /* Displays a table giving information on fixed-format variable
757 parsing on DATA LIST. */
758 /* FIXME: The `Columns' column should be divided into three columns,
759 one for the starting column, one for the dash, one for the ending
760 column; then right-justify the starting column and left-justify the
763 dump_fixed_table (const struct dls_var_spec *specs,
764 const struct file_handle *handle, int nrec)
766 const struct dls_var_spec *spec;
769 const char *filename;
772 for (i = 0, spec = specs; spec; spec = spec->next)
774 t = tab_create (4, i + 1, 0);
775 tab_columns (t, TAB_COL_DOWN, 1);
776 tab_headers (t, 0, 0, 1, 0);
777 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
778 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
779 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
780 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
781 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
782 tab_hline (t, TAL_2, 0, 3, 1);
783 tab_dim (t, tab_natural_dimensions);
785 for (i = 1, spec = specs; spec; spec = spec->next, i++)
787 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
788 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
789 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
791 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
792 fmt_to_string (&spec->input));
795 filename = handle_get_filename (handle);
796 if (filename == NULL)
798 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
799 sprintf (buf, (handle != inline_file
800 ? ngettext ("Reading %d record from file %s.",
801 "Reading %d records from file %s.", nrec)
802 : ngettext ("Reading %d record from the command file.",
803 "Reading %d records from the command file.",
807 tab_title (t, 0, buf);
812 /* Free-format parsing. */
814 /* Parses variable specifications for DATA LIST FREE and adds
815 them to the linked list with head FIRST and tail LAST.
816 Returns nonzero only if successful. */
818 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
823 struct fmt_spec input, output;
829 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
833 if (!parse_format_specifier (&input, 0)
834 || !check_input_specifier (&input)
835 || !lex_force_match (')'))
837 for (i = 0; i < name_cnt; i++)
842 convert_fmt_ItoO (&input, &output);
850 output = get_format();
853 if (input.type == FMT_A || input.type == FMT_AHEX)
857 for (i = 0; i < name_cnt; i++)
859 struct dls_var_spec *spec;
862 v = dict_create_var (default_dict, name[i], width);
865 msg (SE, _("%s is a duplicate variable name."), name[i]);
868 v->print = v->write = output;
870 if (!case_source_is_complex (vfm_source))
873 spec = xmalloc (sizeof *spec);
877 strcpy (spec->name, name[i]);
878 append_var_spec (first, last, spec);
880 for (i = 0; i < name_cnt; i++)
886 lex_error (_("expecting end of command"));
890 /* Displays a table giving information on free-format variable parsing
893 dump_free_table (const struct data_list_pgm *dls)
899 struct dls_var_spec *spec;
900 for (i = 0, spec = dls->first; spec; spec = spec->next)
904 t = tab_create (2, i + 1, 0);
905 tab_columns (t, TAB_COL_DOWN, 1);
906 tab_headers (t, 0, 0, 1, 0);
907 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
908 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
909 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
910 tab_hline (t, TAL_2, 0, 1, 1);
911 tab_dim (t, tab_natural_dimensions);
914 struct dls_var_spec *spec;
916 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
918 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
919 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
924 const char *filename;
926 filename = handle_get_filename (dls->handle);
927 if (filename == NULL)
930 (dls->handle != inline_file
931 ? _("Reading free-form data from file %s.")
932 : _("Reading free-form data from the command file.")),
939 /* Input procedure. */
941 /* Extracts a field from the current position in the current record.
942 Fields can be unquoted or quoted with single- or double-quote
943 characters. *RET_LEN is set to the field length, *RET_CP is set to
944 the field itself. After parsing the field, sets the current
945 position in the record to just past the field. Returns 0 on
946 failure or a 1-based column number indicating the beginning of the
949 cut_field (const struct data_list_pgm *dls, char **ret_cp, int *ret_len)
954 cp = dfm_get_record (dls->handle, &len);
959 if (dls->delim != 0) {
960 if (*cp==dls->delim) {
965 /* Skip leading whitespace and commas. */
966 while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep)
972 /* Three types of fields: quoted with ', quoted with ", unquoted. */
973 if (*cp == '\'' || *cp == '"')
978 while (cp < ep && *cp != quote)
981 while(cp<ep && *cp!=dls->delim) {
985 *ret_len = cp - *ret_cp;
989 msg (SW, _("Scope of string exceeds line."));
995 while(cp<ep && *cp!=dls->delim) {
1000 while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',')
1003 *ret_len = cp - *ret_cp;
1007 int beginning_column;
1009 dfm_set_record (dls->handle, *ret_cp);
1010 beginning_column = dfm_get_cur_col (dls->handle) + 1;
1012 dfm_set_record (dls->handle, cp);
1014 return beginning_column;
1018 typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *);
1019 static data_list_read_func read_from_data_list_fixed;
1020 static data_list_read_func read_from_data_list_free;
1021 static data_list_read_func read_from_data_list_list;
1023 /* Returns the proper function to read the kind of DATA LIST
1024 data specified by DLS. */
1025 static data_list_read_func *
1026 get_data_list_read_func (const struct data_list_pgm *dls)
1031 return read_from_data_list_fixed;
1034 return read_from_data_list_free;
1037 return read_from_data_list_list;
1045 /* Reads a case from the data file into C, parsing it according
1046 to fixed-format syntax rules in DLS. Returns -1 on success,
1047 -2 at end of file. */
1049 read_from_data_list_fixed (const struct data_list_pgm *dls,
1052 struct dls_var_spec *var_spec = dls->first;
1055 if (!dfm_get_record (dls->handle, NULL))
1057 for (i = 1; i <= dls->nrec; i++)
1060 char *line = dfm_get_record (dls->handle, &len);
1064 /* Note that this can't occur on the first record. */
1065 msg (SW, _("Partial case of %d of %d records discarded."),
1070 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1074 data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc);
1075 di.v = &c->data[var_spec->fv];
1077 di.f1 = var_spec->fc;
1078 di.format = var_spec->input;
1083 dfm_fwd_record (dls->handle);
1089 /* Reads a case from the data file into C, parsing it according
1090 to free-format syntax rules in DLS. Returns -1 on success,
1091 -2 at end of file. */
1093 read_from_data_list_free (const struct data_list_pgm *dls,
1096 struct dls_var_spec *var_spec;
1100 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1104 /* Cut out a field and read in a new record if necessary. */
1107 column = cut_field (dls, &field, &len);
1111 if (dfm_get_record (dls->handle, NULL))
1112 dfm_fwd_record (dls->handle);
1113 if (!dfm_get_record (dls->handle, NULL))
1115 if (var_spec != dls->first)
1116 msg (SW, _("Partial case discarded. The first variable "
1117 "missing was %s."), var_spec->name);
1127 di.v = &c->data[var_spec->fv];
1130 di.format = var_spec->input;
1137 /* Reads a case from the data file and parses it according to
1138 list-format syntax rules. Returns -1 on success, -2 at end of
1141 read_from_data_list_list (const struct data_list_pgm *dls,
1144 struct dls_var_spec *var_spec;
1148 if (!dfm_get_record (dls->handle, NULL))
1151 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1153 /* Cut out a field and check for end-of-line. */
1154 int column = cut_field (dls, &field, &len);
1158 if (get_undefined() )
1159 msg (SW, _("Missing value(s) for all variables from %s onward. "
1160 "These will be filled with the system-missing value "
1161 "or blanks, as appropriate."),
1163 for (; var_spec; var_spec = var_spec->next)
1165 int width = get_format_var_width (&var_spec->input);
1167 c->data[var_spec->fv].f = SYSMIS;
1169 memset (c->data[var_spec->fv].s, ' ', width);
1179 di.v = &c->data[var_spec->fv];
1182 di.format = var_spec->input;
1187 dfm_fwd_record (dls->handle);
1191 /* Destroys SPEC. */
1193 destroy_dls_var_spec (struct dls_var_spec *spec)
1195 struct dls_var_spec *next;
1197 while (spec != NULL)
1205 /* Destroys DATA LIST transformation PGM. */
1207 data_list_trns_free (struct trns_header *pgm)
1209 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1210 destroy_dls_var_spec (dls->first);
1211 fh_close_handle (dls->handle);
1215 /* Handle DATA LIST transformation T, parsing data into C. */
1217 data_list_trns_proc (struct trns_header *t, struct ccase *c,
1218 int case_num UNUSED)
1220 struct data_list_pgm *dls = (struct data_list_pgm *) t;
1221 data_list_read_func *read_func;
1224 dfm_push (dls->handle);
1226 read_func = get_data_list_read_func (dls);
1227 retval = read_func (dls, c);
1229 /* Handle end of file. */
1232 /* If we already encountered end of file then this is an
1236 msg (SE, _("Attempt to read past end of file."));
1238 dfm_pop (dls->handle);
1242 /* Otherwise simply note it. */
1248 /* If there was an END subcommand handle it. */
1249 if (dls->end != NULL)
1253 c->data[dls->end->fv].f = 1.0;
1257 c->data[dls->end->fv].f = 0.0;
1260 dfm_pop (dls->handle);
1265 /* Reads all the records from the data file and passes them to
1268 data_list_source_read (struct case_source *source,
1270 write_case_func *write_case, write_case_data wc_data)
1272 struct data_list_pgm *dls = source->aux;
1273 data_list_read_func *read_func = get_data_list_read_func (dls);
1275 dfm_push (dls->handle);
1276 while (read_func (dls, c) != -2)
1277 if (!write_case (wc_data))
1279 dfm_pop (dls->handle);
1281 fh_close_handle (dls->handle);
1284 /* Destroys the source's internal data. */
1286 data_list_source_destroy (struct case_source *source)
1288 data_list_trns_free (source->aux);
1291 const struct case_source_class data_list_source_class =
1295 data_list_source_read,
1296 data_list_source_destroy,
1299 /* REPEATING DATA. */
1301 /* Represents a number or a variable. */
1302 struct rpd_num_or_var
1304 int num; /* Value, or 0. */
1305 struct variable *var; /* Variable, if number==0. */
1308 /* REPEATING DATA private data structure. */
1309 struct repeating_data_trns
1311 struct trns_header h;
1312 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1313 struct file_handle *handle; /* Input file, never NULL. */
1315 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1316 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1317 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1318 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1319 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1320 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1322 /* ID subcommand. */
1323 int id_beg, id_end; /* Beginning & end columns. */
1324 struct variable *id_var; /* DATA LIST variable. */
1325 struct fmt_spec id_spec; /* Input format spec. */
1326 union value *id_value; /* ID value. */
1328 write_case_func *write_case;
1329 write_case_data wc_data;
1332 static trns_free_func repeating_data_trns_free;
1333 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1334 static int parse_repeating_data (struct dls_var_spec **,
1335 struct dls_var_spec **);
1336 static void find_variable_input_spec (struct variable *v,
1337 struct fmt_spec *spec);
1339 /* Parses the REPEATING DATA command. */
1341 cmd_repeating_data (void)
1343 struct repeating_data_trns *rpd;
1345 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1348 /* Bits are set when a particular subcommand has been seen. */
1351 assert (case_source_is_complex (vfm_source));
1353 rpd = xmalloc (sizeof *rpd);
1354 rpd->handle = default_handle;
1355 rpd->first = rpd->last = NULL;
1356 rpd->starts_beg.num = 0;
1357 rpd->starts_beg.var = NULL;
1358 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1359 = rpd->cont_end = rpd->starts_beg;
1360 rpd->id_beg = rpd->id_end = 0;
1362 rpd->id_value = NULL;
1368 if (lex_match_id ("FILE"))
1371 rpd->handle = fh_parse_file_handle ();
1374 if (rpd->handle != default_handle)
1376 msg (SE, _("REPEATING DATA must use the same file as its "
1377 "corresponding DATA LIST or FILE TYPE."));
1381 else if (lex_match_id ("STARTS"))
1386 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1391 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1394 lex_negative_to_dash ();
1395 if (lex_match ('-'))
1397 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1400 /* Otherwise, rpd->starts_end is left uninitialized.
1401 This is okay. We will initialize it later from the
1402 record length of the file. We can't do this now
1403 because we can't be sure that the user has specified
1404 the file handle yet. */
1407 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1408 && rpd->starts_beg.num > rpd->starts_end.num)
1410 msg (SE, _("STARTS beginning column (%d) exceeds "
1411 "STARTS ending column (%d)."),
1412 rpd->starts_beg.num, rpd->starts_end.num);
1416 else if (lex_match_id ("OCCURS"))
1421 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1426 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1429 else if (lex_match_id ("LENGTH"))
1434 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1439 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1442 else if (lex_match_id ("CONTINUED"))
1447 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1452 if (!lex_match ('/'))
1454 if (!parse_num_or_var (&rpd->cont_beg, "CONTINUED beginning column"))
1457 lex_negative_to_dash ();
1459 && !parse_num_or_var (&rpd->cont_end,
1460 "CONTINUED ending column"))
1463 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1464 && rpd->cont_beg.num > rpd->cont_end.num)
1466 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1467 "CONTINUED ending column (%d)."),
1468 rpd->cont_beg.num, rpd->cont_end.num);
1473 rpd->cont_beg.num = 1;
1475 else if (lex_match_id ("ID"))
1480 msg (SE, _("%s subcommand given multiple times."),"ID");
1485 if (!lex_force_int ())
1487 if (lex_integer () < 1)
1489 msg (SE, _("ID beginning column (%ld) must be positive."),
1493 rpd->id_beg = lex_integer ();
1496 lex_negative_to_dash ();
1498 if (lex_match ('-'))
1500 if (!lex_force_int ())
1502 if (lex_integer () < 1)
1504 msg (SE, _("ID ending column (%ld) must be positive."),
1508 if (lex_integer () < rpd->id_end)
1510 msg (SE, _("ID ending column (%ld) cannot be less than "
1511 "ID beginning column (%d)."),
1512 lex_integer (), rpd->id_beg);
1516 rpd->id_end = lex_integer ();
1519 else rpd->id_end = rpd->id_beg;
1521 if (!lex_force_match ('='))
1523 rpd->id_var = parse_variable ();
1524 if (rpd->id_var == NULL)
1527 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1528 rpd->id_value = xmalloc (sizeof *rpd->id_value * rpd->id_var->nv);
1530 else if (lex_match_id ("TABLE"))
1532 else if (lex_match_id ("NOTABLE"))
1534 else if (lex_match_id ("DATA"))
1542 if (!lex_force_match ('/'))
1546 /* Comes here when DATA specification encountered. */
1547 if ((seen & (1 | 2)) != (1 | 2))
1549 if ((seen & 1) == 0)
1550 msg (SE, _("Missing required specification STARTS."));
1551 if ((seen & 2) == 0)
1552 msg (SE, _("Missing required specification OCCURS."));
1556 /* Enforce ID restriction. */
1557 if ((seen & 16) && !(seen & 8))
1559 msg (SE, _("ID specified without CONTINUED."));
1563 /* Calculate starts_end, cont_end if necessary. */
1564 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1565 rpd->starts_end.num = handle_get_record_width (rpd->handle);
1566 if (rpd->cont_end.num == 0 && rpd->starts_end.var == NULL)
1567 rpd->cont_end.num = handle_get_record_width (rpd->handle);
1569 /* Calculate length if possible. */
1570 if ((seen & 4) == 0)
1572 struct dls_var_spec *iter;
1574 for (iter = rpd->first; iter; iter = iter->next)
1576 if (iter->lc > rpd->length.num)
1577 rpd->length.num = iter->lc;
1579 assert (rpd->length.num != 0);
1583 if (!parse_repeating_data (&rpd->first, &rpd->last))
1587 dump_fixed_table (rpd->first, rpd->handle, rpd->last->rec);
1590 struct repeating_data_trns *new_trns;
1592 rpd->h.proc = repeating_data_trns_proc;
1593 rpd->h.free = repeating_data_trns_free;
1595 new_trns = xmalloc (sizeof *new_trns);
1596 memcpy (new_trns, &rpd, sizeof *new_trns);
1597 add_transformation ((struct trns_header *) new_trns);
1600 return lex_end_of_command ();
1603 destroy_dls_var_spec (rpd->first);
1604 free (rpd->id_value);
1608 /* Finds the input format specification for variable V and puts
1609 it in SPEC. Because of the way that DATA LIST is structured,
1610 this is nontrivial. */
1612 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1616 for (i = 0; i < n_trns; i++)
1618 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1620 if (pgm->h.proc == data_list_trns_proc)
1622 struct dls_var_spec *iter;
1624 for (iter = pgm->first; iter; iter = iter->next)
1627 *spec = iter->input;
1636 /* Parses a number or a variable name from the syntax file and puts
1637 the results in VALUE. Ensures that the number is at least 1; else
1638 emits an error based on MESSAGE. Returns nonzero only if
1641 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1646 value->var = parse_variable ();
1647 if (value->var == NULL)
1649 if (value->var->type == ALPHA)
1651 msg (SE, _("String variable not allowed here."));
1655 else if (lex_integer_p ())
1657 value->num = lex_integer ();
1661 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1667 msg (SE, _("Variable or integer expected for %s."), message);
1673 /* Parses data specifications for repeating data groups, adding
1674 them to the linked list with head FIRST and tail LAST.
1675 Returns nonzero only if successful. */
1677 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1679 struct fixed_parsing_state fx;
1685 while (token != '.')
1687 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1692 if (!fixed_parse_compatible (&fx, first, last))
1695 else if (token == '(')
1697 if (!fixed_parse_fortran (&fx, first, last))
1702 msg (SE, _("SPSS-like or FORTRAN-like format "
1703 "specification expected after variable names."));
1707 for (i = 0; i < fx.name_cnt; i++)
1713 lex_error (_("expecting end of command"));
1720 for (i = 0; i < fx.name_cnt; i++)
1726 /* Obtains the real value for rpd_num_or_var N in case C and returns
1727 it. The valid range is nonnegative numbers, but numbers outside
1728 this range can be returned and should be handled by the caller as
1731 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1736 assert (n->num == 0);
1739 double v = c->data[n->var->fv].f;
1741 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1750 /* Parameter record passed to rpd_parse_record(). */
1751 struct rpd_parse_info
1753 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1754 const char *line; /* Line being parsed. */
1755 size_t len; /* Line length. */
1756 int beg, end; /* First and last column of first occurrence. */
1757 int ofs; /* Column offset between repeated occurrences. */
1758 struct ccase *c; /* Case to fill in. */
1759 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1760 int max_occurs; /* Max number of occurrences to parse. */
1763 /* Parses one record of repeated data and outputs corresponding
1764 cases. Returns number of occurrences parsed up to the
1765 maximum specified in INFO. */
1767 rpd_parse_record (const struct rpd_parse_info *info)
1769 struct repeating_data_trns *t = info->trns;
1770 int cur = info->beg;
1773 /* Handle record ID values. */
1776 union value id_temp[MAX_ELEMS_PER_VALUE];
1778 /* Parse record ID into V. */
1782 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1783 di.v = info->verify_id ? id_temp : t->id_value;
1786 di.format = t->id_spec;
1793 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1795 char expected_str [MAX_FORMATTED_LEN + 1];
1796 char actual_str [MAX_FORMATTED_LEN + 1];
1798 data_out (expected_str, &t->id_var->print, t->id_value);
1799 expected_str[t->id_var->print.w] = '\0';
1801 data_out (actual_str, &t->id_var->print, id_temp);
1802 actual_str[t->id_var->print.w] = '\0';
1805 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1806 actual_str, expected_str);
1812 /* Iterate over the set of expected occurrences and record each of
1813 them as a separate case. FIXME: We need to execute any
1814 transformations that follow the current one. */
1818 for (occurrences = 0; occurrences < info->max_occurs; )
1820 if (cur + info->ofs > info->end + 1)
1825 struct dls_var_spec *var_spec = t->first;
1827 for (; var_spec; var_spec = var_spec->next)
1829 int fc = var_spec->fc - 1 + cur;
1830 int lc = var_spec->lc - 1 + cur;
1832 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1837 _("Variable %s starting in column %d extends "
1838 "beyond physical record length of %d."),
1839 var_spec->v->name, fc, info->len);
1845 data_in_finite_line (&di, info->line, info->len, fc, lc);
1846 di.v = &info->c->data[var_spec->fv];
1849 di.format = var_spec->input;
1859 if (!t->write_case (t->wc_data))
1867 /* Reads one set of repetitions of the elements in the REPEATING
1868 DATA structure. Returns -1 on success, -2 on end of file or
1871 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c,
1872 int case_num UNUSED)
1874 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1876 char *line; /* Current record. */
1877 int len; /* Length of current record. */
1879 int starts_beg; /* Starting column. */
1880 int starts_end; /* Ending column. */
1881 int occurs; /* Number of repetitions. */
1882 int length; /* Length of each occurrence. */
1883 int cont_beg; /* Starting column for continuation lines. */
1884 int cont_end; /* Ending column for continuation lines. */
1886 int occurs_left; /* Number of occurrences remaining. */
1888 int code; /* Return value from rpd_parse_record(). */
1890 int skip_first_record = 0;
1892 dfm_push (t->handle);
1894 /* Read the current record. */
1895 dfm_bkwd_record (t->handle, 1);
1896 line = dfm_get_record (t->handle, &len);
1899 dfm_fwd_record (t->handle);
1901 /* Calculate occurs, length. */
1902 occurs_left = occurs = realize_value (&t->occurs, c);
1905 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1908 starts_beg = realize_value (&t->starts_beg, c);
1909 if (starts_beg <= 0)
1911 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1916 starts_end = realize_value (&t->starts_end, c);
1917 if (starts_end < starts_beg)
1919 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1920 "beginning column (%d)."),
1921 starts_end, starts_beg);
1922 skip_first_record = 1;
1924 length = realize_value (&t->length, c);
1927 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1929 occurs = occurs_left = 1;
1931 cont_beg = realize_value (&t->cont_beg, c);
1934 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1939 cont_end = realize_value (&t->cont_end, c);
1940 if (cont_end < cont_beg)
1942 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1943 "beginning column (%d)."),
1944 cont_end, cont_beg);
1948 /* Parse the first record. */
1949 if (!skip_first_record)
1951 struct rpd_parse_info info;
1955 info.beg = starts_beg;
1956 info.end = starts_end;
1960 info.max_occurs = occurs_left;
1961 code = rpd_parse_record (&info);
1964 occurs_left -= code;
1966 else if (cont_beg == 0)
1969 /* Make sure, if some occurrences are left, that we have
1970 continuation records. */
1971 if (occurs_left > 0 && cont_beg == 0)
1974 _("Number of repetitions specified on OCCURS (%d) "
1975 "exceed number of repetitions available in "
1976 "space on STARTS (%d), and CONTINUED not specified."),
1977 occurs, (starts_end - starts_beg + 1) / length);
1981 /* Go on to additional records. */
1982 while (occurs_left != 0)
1984 struct rpd_parse_info info;
1986 assert (occurs_left >= 0);
1988 /* Read in another record. */
1989 line = dfm_get_record (t->handle, &len);
1993 _("Unexpected end of file with %d repetitions "
1994 "remaining out of %d."),
1995 occurs_left, occurs);
1998 dfm_fwd_record (t->handle);
2000 /* Parse this record. */
2004 info.beg = cont_beg;
2005 info.end = cont_end;
2009 info.max_occurs = occurs_left;
2010 code = rpd_parse_record (&info);;
2013 occurs_left -= code;
2016 dfm_pop (t->handle);
2018 /* FIXME: This is a kluge until we've implemented multiplexing of
2023 /* Frees a REPEATING DATA transformation. */
2025 repeating_data_trns_free (struct trns_header *rpd_)
2027 struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_;
2029 destroy_dls_var_spec (rpd->first);
2030 fh_close_handle (rpd->handle);
2031 free (rpd->id_value);
2034 /* Lets repeating_data_trns_proc() know how to write the cases
2035 that it composes. Not elegant. */
2037 repeating_data_set_write_case (struct trns_header *trns,
2038 write_case_func *write_case,
2039 write_case_data wc_data)
2041 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
2043 assert (trns->proc == repeating_data_trns_proc);
2044 t->write_case = write_case;
2045 t->wc_data = wc_data;