1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "data-list.h"
30 #include "debug-print.h"
33 #include "file-handle.h"
43 /* Utility function. */
45 /* FIXME: Either REPEATING DATA must be the last transformation, or we
46 must multiplex the transformations that follow (i.e., perform them
47 for every case that we produce from a repetition instance).
48 Currently we do neither. We should do one or the other. */
50 /* Describes how to parse one variable. */
53 struct dls_var_spec *next; /* Next specification in list. */
55 /* Both free and fixed formats. */
56 struct fmt_spec input; /* Input format of this field. */
57 struct variable *v; /* Associated variable. Used only in
58 parsing. Not safe later. */
59 int fv; /* First value in case. */
61 /* Fixed format only. */
62 int rec; /* Record number (1-based). */
63 int fc, lc; /* Column numbers in record. */
65 /* Free format only. */
66 char name[9]; /* Name of variable. */
69 /* Constants for DATA LIST type. */
70 /* Must match table in cmd_data_list(). */
78 /* DATA LIST private data structure. */
83 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
84 struct file_handle *handle; /* Input file, never NULL. */
86 int type; /* A DLS_* constant. */
87 struct variable *end; /* Variable specified on END subcommand. */
88 int eof; /* End of file encountered. */
89 int nrec; /* Number of records. */
90 size_t case_size; /* Case size in bytes. */
93 static int parse_fixed (struct data_list_pgm *);
94 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
95 static void dump_fixed_table (const struct dls_var_spec *specs,
96 const struct file_handle *handle, int nrec);
97 static void dump_free_table (const struct data_list_pgm *);
98 static void destroy_dls_var_spec (struct dls_var_spec *);
99 static trns_free_func data_list_trns_free;
100 static trns_proc_func data_list_trns_proc;
102 /* Message title for REPEATING DATA. */
103 #define RPD_ERR "REPEATING DATA: "
108 /* DATA LIST program under construction. */
109 struct data_list_pgm *dls;
111 /* 0=print no table, 1=print table. (TABLE subcommand.) */
114 lex_match_id ("DATA");
115 lex_match_id ("LIST");
117 if (!case_source_is_complex (vfm_source))
118 discard_variables ();
120 dls = xmalloc (sizeof *dls);
121 dls->handle = default_handle;
126 dls->first = dls->last = NULL;
130 if (lex_match_id ("FILE"))
133 dls->handle = fh_parse_file_handle ();
136 if (case_source_is_class (vfm_source, &file_type_source_class)
137 && dls->handle != default_handle)
139 msg (SE, _("DATA LIST may not use a different file from "
140 "that specified on its surrounding FILE TYPE."));
144 else if (lex_match_id ("RECORDS"))
148 if (!lex_force_int ())
150 dls->nrec = lex_integer ();
154 else if (lex_match_id ("END"))
158 msg (SE, _("The END subcommand may only be specified once."));
163 if (!lex_force_id ())
165 dls->end = dict_lookup_var (default_dict, tokid);
167 dls->end = dict_create_var_assert (default_dict, tokid, 0);
170 else if (token == T_ID)
172 /* Must match DLS_* constants. */
173 static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE",
178 for (p = id; *p; p++)
179 if (lex_id_match (*p, tokid))
194 msg (SE, _("Only one of FIXED, FREE, or LIST may "
211 dls->case_size = dict_get_case_size (default_dict);
212 default_handle = dls->handle;
215 dls->type = DLS_FIXED;
219 if (dls->type == DLS_FREE)
225 if (dls->type == DLS_FIXED)
227 if (!parse_fixed (dls))
230 dump_fixed_table (dls->first, dls->handle, dls->nrec);
234 if (!parse_free (&dls->first, &dls->last))
237 dump_free_table (dls);
240 if (vfm_source != NULL)
242 struct data_list_pgm *new_pgm;
244 dls->h.proc = data_list_trns_proc;
245 dls->h.free = data_list_trns_free;
247 new_pgm = xmalloc (sizeof *new_pgm);
248 memcpy (new_pgm, &dls, sizeof *new_pgm);
249 add_transformation (&new_pgm->h);
252 vfm_source = create_case_source (&data_list_source_class,
258 destroy_dls_var_spec (dls->first);
263 /* Adds SPEC to the linked list with head at FIRST and tail at
266 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
267 struct dls_var_spec *spec)
274 (*last)->next = spec;
278 /* Fixed-format parsing. */
280 /* Used for chaining together fortran-like format specifiers. */
283 struct fmt_list *next;
286 struct fmt_list *down;
289 /* State of parsing DATA LIST. */
290 struct fixed_parsing_state
292 char **name; /* Variable names. */
293 int name_cnt; /* Number of names. */
295 int recno; /* Index of current record. */
296 int sc; /* 1-based column number of starting column for
297 next field to output. */
300 static int fixed_parse_compatible (struct fixed_parsing_state *,
301 struct dls_var_spec **,
302 struct dls_var_spec **);
303 static int fixed_parse_fortran (struct fixed_parsing_state *,
304 struct dls_var_spec **,
305 struct dls_var_spec **);
307 /* Parses all the variable specifications for DATA LIST FIXED,
308 storing them into DLS. Returns nonzero if successful. */
310 parse_fixed (struct data_list_pgm *dls)
312 struct fixed_parsing_state fx;
320 while (lex_match ('/'))
323 if (lex_integer_p ())
325 if (lex_integer () < fx.recno)
327 msg (SE, _("The record number specified, %ld, is "
328 "before the previous record, %d. Data "
329 "fields must be listed in order of "
330 "increasing record number."),
331 lex_integer (), fx.recno - 1);
335 fx.recno = lex_integer ();
341 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
346 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
349 else if (token == '(')
351 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
356 msg (SE, _("SPSS-like or FORTRAN-like format "
357 "specification expected after variable names."));
361 for (i = 0; i < fx.name_cnt; i++)
365 if (dls->first == NULL)
367 msg (SE, _("At least one variable must be specified."));
370 if (dls->nrec && dls->last->rec > dls->nrec)
372 msg (SE, _("Variables are specified on records that "
373 "should not exist according to RECORDS subcommand."));
377 dls->nrec = dls->last->rec;
380 lex_error (_("expecting end of command"));
386 for (i = 0; i < fx.name_cnt; i++)
392 /* Parses a variable specification in the form 1-10 (A) based on
393 FX and adds specifications to the linked list with head at
394 FIRST and tail at LAST. */
396 fixed_parse_compatible (struct fixed_parsing_state *fx,
397 struct dls_var_spec **first, struct dls_var_spec **last)
399 struct fmt_spec input;
405 if (!lex_force_int ())
410 msg (SE, _("Column positions for fields must be positive."));
416 lex_negative_to_dash ();
419 if (!lex_force_int ())
424 msg (SE, _("Column positions for fields must be positive."));
429 msg (SE, _("The ending column for a field must be "
430 "greater than the starting column."));
439 /* Divide columns evenly. */
440 input.w = (lc - fc + 1) / fx->name_cnt;
441 if ((lc - fc + 1) % fx->name_cnt)
443 msg (SE, _("The %d columns %d-%d "
444 "can't be evenly divided into %d fields."),
445 lc - fc + 1, fc, lc, fx->name_cnt);
449 /* Format specifier. */
452 struct fmt_desc *fdp;
458 input.type = parse_format_specifier_name (&cp, 0);
459 if (input.type == -1)
463 msg (SE, _("A format specifier on this line "
464 "has extra characters on the end."));
474 if (lex_integer_p ())
476 if (lex_integer () < 1)
478 msg (SE, _("The value for number of decimal places "
479 "must be at least 1."));
483 input.d = lex_integer ();
489 fdp = &formats[input.type];
490 if (fdp->n_args < 2 && input.d)
492 msg (SE, _("Input format %s doesn't accept decimal places."),
500 if (!lex_force_match (')'))
508 if (!check_input_specifier (&input))
511 /* Start column for next specification. */
514 /* Width of variables to create. */
515 if (input.type == FMT_A || input.type == FMT_AHEX)
520 /* Create variables and var specs. */
521 for (i = 0; i < fx->name_cnt; i++)
523 struct dls_var_spec *spec;
526 v = dict_create_var (default_dict, fx->name[i], width);
529 convert_fmt_ItoO (&input, &v->print);
531 if (!case_source_is_complex (vfm_source))
536 v = dict_lookup_var_assert (default_dict, fx->name[i]);
537 if (vfm_source == NULL)
539 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
542 if ((width != 0) != (v->width != 0))
544 msg (SE, _("There is already a variable %s of a "
549 if (width != 0 && width != v->width)
551 msg (SE, _("There is already a string variable %s of a "
552 "different width."), fx->name[i]);
557 spec = xmalloc (sizeof *spec);
561 spec->rec = fx->recno;
562 spec->fc = fc + input.w * i;
563 spec->lc = spec->fc + input.w - 1;
564 append_var_spec (first, last, spec);
569 /* Destroy format list F and, if RECURSE is nonzero, all its
572 destroy_fmt_list (struct fmt_list *f, int recurse)
574 struct fmt_list *next;
579 if (recurse && f->f.type == FMT_DESCEND)
580 destroy_fmt_list (f->down, 1);
585 /* Takes a hierarchically structured fmt_list F as constructed by
586 fixed_parse_fortran(), and flattens it, adding the variable
587 specifications to the linked list with head FIRST and tail
588 LAST. NAME_IDX is used to take values from the list of names
589 in FX; it should initially point to a value of 0. */
591 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
592 struct dls_var_spec **first, struct dls_var_spec **last,
597 for (; f; f = f->next)
598 if (f->f.type == FMT_X)
600 else if (f->f.type == FMT_T)
602 else if (f->f.type == FMT_NEWREC)
604 fx->recno += f->count;
608 for (i = 0; i < f->count; i++)
609 if (f->f.type == FMT_DESCEND)
611 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
616 struct dls_var_spec *spec;
620 if (formats[f->f.type].cat & FCAT_STRING)
624 if (*name_idx >= fx->name_cnt)
626 msg (SE, _("The number of format "
627 "specifications exceeds the given number of "
632 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
635 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
639 if (!case_source_is_complex (vfm_source))
642 spec = xmalloc (sizeof *spec);
646 spec->rec = fx->recno;
648 spec->lc = fx->sc + f->f.w - 1;
649 append_var_spec (first, last, spec);
651 convert_fmt_ItoO (&spec->input, &v->print);
659 /* Recursively parses a FORTRAN-like format specification into
660 the linked list with head FIRST and tail TAIL. LEVEL is the
661 level of recursion, starting from 0. Returns the parsed
662 specification if successful, or a null pointer on failure. */
663 static struct fmt_list *
664 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
665 struct dls_var_spec **first,
666 struct dls_var_spec **last)
668 struct fmt_list *head = NULL;
669 struct fmt_list *tail = NULL;
671 lex_force_match ('(');
675 struct fmt_list *new = xmalloc (sizeof *new);
678 /* Append new to list. */
686 if (lex_integer_p ())
688 new->count = lex_integer ();
694 /* Parse format specifier. */
697 new->f.type = FMT_DESCEND;
698 new->down = fixed_parse_fortran_internal (fx, first, last);
699 if (new->down == NULL)
702 else if (lex_match ('/'))
703 new->f.type = FMT_NEWREC;
704 else if (!parse_format_specifier (&new->f, 1)
705 || !check_input_specifier (&new->f))
710 lex_force_match (')');
715 destroy_fmt_list (head, 0);
720 /* Parses a FORTRAN-like format specification into the linked
721 list with head FIRST and tail LAST. Returns nonzero if
724 fixed_parse_fortran (struct fixed_parsing_state *fx,
725 struct dls_var_spec **first, struct dls_var_spec **last)
727 struct fmt_list *list;
730 list = fixed_parse_fortran_internal (fx, first, last);
735 dump_fmt_list (fx, list, first, last, &name_idx);
736 destroy_fmt_list (list, 1);
737 if (name_idx < fx->name_cnt)
739 msg (SE, _("There aren't enough format specifications "
740 "to match the number of variable names given."));
747 /* Displays a table giving information on fixed-format variable
748 parsing on DATA LIST. */
749 /* FIXME: The `Columns' column should be divided into three columns,
750 one for the starting column, one for the dash, one for the ending
751 column; then right-justify the starting column and left-justify the
754 dump_fixed_table (const struct dls_var_spec *specs,
755 const struct file_handle *handle, int nrec)
757 const struct dls_var_spec *spec;
760 const char *filename;
763 for (i = 0, spec = specs; spec; spec = spec->next)
765 t = tab_create (4, i + 1, 0);
766 tab_columns (t, TAB_COL_DOWN, 1);
767 tab_headers (t, 0, 0, 1, 0);
768 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
769 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
770 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
771 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
772 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
773 tab_hline (t, TAL_2, 0, 3, 1);
774 tab_dim (t, tab_natural_dimensions);
776 for (i = 1, spec = specs; spec; spec = spec->next, i++)
778 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
779 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
780 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
782 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
783 fmt_to_string (&spec->input));
786 filename = fh_handle_name (handle);
787 if (filename == NULL)
789 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
790 sprintf (buf, (handle != inline_file
791 ? ngettext ("Reading %d record from file %s.",
792 "Reading %d records from file %s.", nrec)
793 : ngettext ("Reading %d record from the command file.",
794 "Reading %d records from the command file.",
798 tab_title (t, 0, buf);
800 fh_handle_name (NULL);
804 /* Free-format parsing. */
806 /* Parses variable specifications for DATA LIST FREE and adds
807 them to the linked list with head FIRST and tail LAST.
808 Returns nonzero only if successful. */
810 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
815 struct fmt_spec input, output;
821 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
825 if (!parse_format_specifier (&input, 0)
826 || !check_input_specifier (&input)
827 || !lex_force_match (')'))
829 for (i = 0; i < name_cnt; i++)
834 convert_fmt_ItoO (&input, &output);
845 if (input.type == FMT_A || input.type == FMT_AHEX)
849 for (i = 0; i < name_cnt; i++)
851 struct dls_var_spec *spec;
854 v = dict_create_var (default_dict, name[i], width);
857 msg (SE, _("%s is a duplicate variable name."), name[i]);
860 v->print = v->write = output;
862 if (!case_source_is_complex (vfm_source))
865 spec = xmalloc (sizeof *spec);
869 strcpy (spec->name, name[i]);
870 append_var_spec (first, last, spec);
872 for (i = 0; i < name_cnt; i++)
878 lex_error (_("expecting end of command"));
882 /* Displays a table giving information on free-format variable parsing
885 dump_free_table (const struct data_list_pgm *dls)
891 struct dls_var_spec *spec;
892 for (i = 0, spec = dls->first; spec; spec = spec->next)
896 t = tab_create (2, i + 1, 0);
897 tab_columns (t, TAB_COL_DOWN, 1);
898 tab_headers (t, 0, 0, 1, 0);
899 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
900 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
901 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
902 tab_hline (t, TAL_2, 0, 1, 1);
903 tab_dim (t, tab_natural_dimensions);
906 struct dls_var_spec *spec;
908 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
910 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
911 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
916 const char *filename;
918 filename = fh_handle_name (dls->handle);
919 if (filename == NULL)
922 (dls->handle != inline_file
923 ? _("Reading free-form data from file %s.")
924 : _("Reading free-form data from the command file.")),
929 fh_handle_name (NULL);
932 /* Input procedure. */
934 /* Extracts a field from the current position in the current record.
935 Fields can be unquoted or quoted with single- or double-quote
936 characters. *RET_LEN is set to the field length, *RET_CP is set to
937 the field itself. After parsing the field, sets the current
938 position in the record to just past the field. Returns 0 on
939 failure or a 1-based column number indicating the beginning of the
942 cut_field (const struct data_list_pgm *dls, char **ret_cp, int *ret_len)
947 cp = dfm_get_record (dls->handle, &len);
953 /* Skip leading whitespace and commas. */
954 while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep)
959 /* Three types of fields: quoted with ', quoted with ", unquoted. */
960 if (*cp == '\'' || *cp == '"')
965 while (cp < ep && *cp != quote)
967 *ret_len = cp - *ret_cp;
971 msg (SW, _("Scope of string exceeds line."));
976 while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',')
978 *ret_len = cp - *ret_cp;
982 int beginning_column;
984 dfm_set_record (dls->handle, *ret_cp);
985 beginning_column = dfm_get_cur_col (dls->handle) + 1;
987 dfm_set_record (dls->handle, cp);
989 return beginning_column;
993 typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *);
994 static data_list_read_func read_from_data_list_fixed;
995 static data_list_read_func read_from_data_list_free;
996 static data_list_read_func read_from_data_list_list;
998 /* Returns the proper function to read the kind of DATA LIST
999 data specified by DLS. */
1000 static data_list_read_func *
1001 get_data_list_read_func (const struct data_list_pgm *dls)
1006 return read_from_data_list_fixed;
1009 return read_from_data_list_free;
1012 return read_from_data_list_list;
1019 /* Reads a case from the data file into C, parsing it according
1020 to fixed-format syntax rules in DLS. Returns -1 on success,
1021 -2 at end of file. */
1023 read_from_data_list_fixed (const struct data_list_pgm *dls,
1026 struct dls_var_spec *var_spec = dls->first;
1029 if (!dfm_get_record (dls->handle, NULL))
1031 for (i = 1; i <= dls->nrec; i++)
1034 char *line = dfm_get_record (dls->handle, &len);
1038 /* Note that this can't occur on the first record. */
1039 msg (SW, _("Partial case of %d of %d records discarded."),
1044 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1048 data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc);
1049 di.v = &c->data[var_spec->fv];
1051 di.f1 = var_spec->fc;
1052 di.format = var_spec->input;
1057 dfm_fwd_record (dls->handle);
1063 /* Reads a case from the data file into C, parsing it according
1064 to free-format syntax rules in DLS. Returns -1 on success,
1065 -2 at end of file. */
1067 read_from_data_list_free (const struct data_list_pgm *dls,
1070 struct dls_var_spec *var_spec;
1074 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1078 /* Cut out a field and read in a new record if necessary. */
1081 column = cut_field (dls, &field, &len);
1085 if (dfm_get_record (dls->handle, NULL))
1086 dfm_fwd_record (dls->handle);
1087 if (!dfm_get_record (dls->handle, NULL))
1089 if (var_spec != dls->first)
1090 msg (SW, _("Partial case discarded. The first variable "
1091 "missing was %s."), var_spec->name);
1101 di.v = &c->data[var_spec->fv];
1104 di.format = var_spec->input;
1111 /* Reads a case from the data file and parses it according to
1112 list-format syntax rules. Returns -1 on success, -2 at end of
1115 read_from_data_list_list (const struct data_list_pgm *dls,
1118 struct dls_var_spec *var_spec;
1122 if (!dfm_get_record (dls->handle, NULL))
1125 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1127 /* Cut out a field and check for end-of-line. */
1128 int column = cut_field (dls, &field, &len);
1133 msg (SW, _("Missing value(s) for all variables from %s onward. "
1134 "These will be filled with the system-missing value "
1135 "or blanks, as appropriate."),
1137 for (; var_spec; var_spec = var_spec->next)
1139 int width = get_format_var_width (&var_spec->input);
1141 c->data[var_spec->fv].f = SYSMIS;
1143 memset (c->data[var_spec->fv].s, ' ', width);
1153 di.v = &c->data[var_spec->fv];
1156 di.format = var_spec->input;
1161 dfm_fwd_record (dls->handle);
1165 /* Destroys SPEC. */
1167 destroy_dls_var_spec (struct dls_var_spec *spec)
1169 struct dls_var_spec *next;
1171 while (spec != NULL)
1179 /* Destroys DATA LIST transformation PGM. */
1181 data_list_trns_free (struct trns_header *pgm)
1183 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1184 destroy_dls_var_spec (dls->first);
1185 fh_close_handle (dls->handle);
1189 /* Handle DATA LIST transformation T, parsing data into C. */
1191 data_list_trns_proc (struct trns_header *t, struct ccase *c,
1192 int case_num UNUSED)
1194 struct data_list_pgm *dls = (struct data_list_pgm *) t;
1195 data_list_read_func *read_func;
1198 dfm_push (dls->handle);
1200 read_func = get_data_list_read_func (dls);
1201 retval = read_func (dls, c);
1203 /* Handle end of file. */
1206 /* If we already encountered end of file then this is an
1210 msg (SE, _("Attempt to read past end of file."));
1212 dfm_pop (dls->handle);
1216 /* Otherwise simply note it. */
1222 /* If there was an END subcommand handle it. */
1223 if (dls->end != NULL)
1227 c->data[dls->end->fv].f = 1.0;
1231 c->data[dls->end->fv].f = 0.0;
1234 dfm_pop (dls->handle);
1239 /* Reads all the records from the data file and passes them to
1242 data_list_source_read (struct case_source *source,
1244 write_case_func *write_case, write_case_data wc_data)
1246 struct data_list_pgm *dls = source->aux;
1247 data_list_read_func *read_func = get_data_list_read_func (dls);
1249 dfm_push (dls->handle);
1250 while (read_func (dls, c) != -2)
1251 if (!write_case (wc_data))
1253 dfm_pop (dls->handle);
1255 fh_close_handle (dls->handle);
1258 /* Destroys the source's internal data. */
1260 data_list_source_destroy (struct case_source *source)
1262 data_list_trns_free (source->aux);
1265 const struct case_source_class data_list_source_class =
1269 data_list_source_read,
1270 data_list_source_destroy,
1273 /* REPEATING DATA. */
1275 /* Represents a number or a variable. */
1276 struct rpd_num_or_var
1278 int num; /* Value, or 0. */
1279 struct variable *var; /* Variable, if number==0. */
1282 /* REPEATING DATA private data structure. */
1283 struct repeating_data_trns
1285 struct trns_header h;
1286 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1287 struct file_handle *handle; /* Input file, never NULL. */
1289 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1290 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1291 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1292 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1293 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1294 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1296 /* ID subcommand. */
1297 int id_beg, id_end; /* Beginning & end columns. */
1298 struct variable *id_var; /* DATA LIST variable. */
1299 struct fmt_spec id_spec; /* Input format spec. */
1300 union value *id_value; /* ID value. */
1302 write_case_func *write_case;
1303 write_case_data wc_data;
1306 static trns_free_func repeating_data_trns_free;
1307 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1308 static int parse_repeating_data (struct dls_var_spec **,
1309 struct dls_var_spec **);
1310 static void find_variable_input_spec (struct variable *v,
1311 struct fmt_spec *spec);
1313 /* Parses the REPEATING DATA command. */
1315 cmd_repeating_data (void)
1317 struct repeating_data_trns *rpd;
1319 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1322 /* Bits are set when a particular subcommand has been seen. */
1325 lex_match_id ("REPEATING");
1326 lex_match_id ("DATA");
1328 assert (case_source_is_complex (vfm_source));
1330 rpd = xmalloc (sizeof *rpd);
1331 rpd->handle = default_handle;
1332 rpd->first = rpd->last = NULL;
1333 rpd->starts_beg.num = 0;
1334 rpd->starts_beg.var = NULL;
1335 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1336 = rpd->cont_end = rpd->starts_beg;
1337 rpd->id_beg = rpd->id_end = 0;
1339 rpd->id_value = NULL;
1345 if (lex_match_id ("FILE"))
1348 rpd->handle = fh_parse_file_handle ();
1351 if (rpd->handle != default_handle)
1353 msg (SE, _("REPEATING DATA must use the same file as its "
1354 "corresponding DATA LIST or FILE TYPE."));
1358 else if (lex_match_id ("STARTS"))
1363 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1368 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1371 lex_negative_to_dash ();
1372 if (lex_match ('-'))
1374 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1377 /* Otherwise, rpd->starts_end is left uninitialized.
1378 This is okay. We will initialize it later from the
1379 record length of the file. We can't do this now
1380 because we can't be sure that the user has specified
1381 the file handle yet. */
1384 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1385 && rpd->starts_beg.num > rpd->starts_end.num)
1387 msg (SE, _("STARTS beginning column (%d) exceeds "
1388 "STARTS ending column (%d)."),
1389 rpd->starts_beg.num, rpd->starts_end.num);
1393 else if (lex_match_id ("OCCURS"))
1398 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1403 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1406 else if (lex_match_id ("LENGTH"))
1411 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1416 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1419 else if (lex_match_id ("CONTINUED"))
1424 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1429 if (!lex_match ('/'))
1431 if (!parse_num_or_var (&rpd->cont_beg, "CONTINUED beginning column"))
1434 lex_negative_to_dash ();
1436 && !parse_num_or_var (&rpd->cont_end,
1437 "CONTINUED ending column"))
1440 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1441 && rpd->cont_beg.num > rpd->cont_end.num)
1443 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1444 "CONTINUED ending column (%d)."),
1445 rpd->cont_beg.num, rpd->cont_end.num);
1450 rpd->cont_beg.num = 1;
1452 else if (lex_match_id ("ID"))
1457 msg (SE, _("%s subcommand given multiple times."),"ID");
1462 if (!lex_force_int ())
1464 if (lex_integer () < 1)
1466 msg (SE, _("ID beginning column (%ld) must be positive."),
1470 rpd->id_beg = lex_integer ();
1473 lex_negative_to_dash ();
1475 if (lex_match ('-'))
1477 if (!lex_force_int ())
1479 if (lex_integer () < 1)
1481 msg (SE, _("ID ending column (%ld) must be positive."),
1485 if (lex_integer () < rpd->id_end)
1487 msg (SE, _("ID ending column (%ld) cannot be less than "
1488 "ID beginning column (%d)."),
1489 lex_integer (), rpd->id_beg);
1493 rpd->id_end = lex_integer ();
1496 else rpd->id_end = rpd->id_beg;
1498 if (!lex_force_match ('='))
1500 rpd->id_var = parse_variable ();
1501 if (rpd->id_var == NULL)
1504 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1505 rpd->id_value = xmalloc (sizeof *rpd->id_value * rpd->id_var->nv);
1507 else if (lex_match_id ("TABLE"))
1509 else if (lex_match_id ("NOTABLE"))
1511 else if (lex_match_id ("DATA"))
1519 if (!lex_force_match ('/'))
1523 /* Comes here when DATA specification encountered. */
1524 if ((seen & (1 | 2)) != (1 | 2))
1526 if ((seen & 1) == 0)
1527 msg (SE, _("Missing required specification STARTS."));
1528 if ((seen & 2) == 0)
1529 msg (SE, _("Missing required specification OCCURS."));
1533 /* Enforce ID restriction. */
1534 if ((seen & 16) && !(seen & 8))
1536 msg (SE, _("ID specified without CONTINUED."));
1540 /* Calculate starts_end, cont_end if necessary. */
1541 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1542 rpd->starts_end.num = fh_record_width (rpd->handle);
1543 if (rpd->cont_end.num == 0 && rpd->starts_end.var == NULL)
1544 rpd->cont_end.num = fh_record_width (rpd->handle);
1546 /* Calculate length if possible. */
1547 if ((seen & 4) == 0)
1549 struct dls_var_spec *iter;
1551 for (iter = rpd->first; iter; iter = iter->next)
1553 if (iter->lc > rpd->length.num)
1554 rpd->length.num = iter->lc;
1556 assert (rpd->length.num != 0);
1560 if (!parse_repeating_data (&rpd->first, &rpd->last))
1564 dump_fixed_table (rpd->first, rpd->handle, rpd->last->rec);
1567 struct repeating_data_trns *new_trns;
1569 rpd->h.proc = repeating_data_trns_proc;
1570 rpd->h.free = repeating_data_trns_free;
1572 new_trns = xmalloc (sizeof *new_trns);
1573 memcpy (new_trns, &rpd, sizeof *new_trns);
1574 add_transformation ((struct trns_header *) new_trns);
1577 return lex_end_of_command ();
1580 destroy_dls_var_spec (rpd->first);
1581 free (rpd->id_value);
1585 /* Finds the input format specification for variable V and puts
1586 it in SPEC. Because of the way that DATA LIST is structured,
1587 this is nontrivial. */
1589 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1593 for (i = 0; i < n_trns; i++)
1595 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1597 if (pgm->h.proc == data_list_trns_proc)
1599 struct dls_var_spec *iter;
1601 for (iter = pgm->first; iter; iter = iter->next)
1604 *spec = iter->input;
1613 /* Parses a number or a variable name from the syntax file and puts
1614 the results in VALUE. Ensures that the number is at least 1; else
1615 emits an error based on MESSAGE. Returns nonzero only if
1618 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1623 value->var = parse_variable ();
1624 if (value->var == NULL)
1626 if (value->var->type == ALPHA)
1628 msg (SE, _("String variable not allowed here."));
1632 else if (lex_integer_p ())
1634 value->num = lex_integer ();
1638 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1644 msg (SE, _("Variable or integer expected for %s."), message);
1650 /* Parses data specifications for repeating data groups, adding
1651 them to the linked list with head FIRST and tail LAST.
1652 Returns nonzero only if successful. */
1654 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1656 struct fixed_parsing_state fx;
1662 while (token != '.')
1664 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1669 if (!fixed_parse_compatible (&fx, first, last))
1672 else if (token == '(')
1674 if (!fixed_parse_fortran (&fx, first, last))
1679 msg (SE, _("SPSS-like or FORTRAN-like format "
1680 "specification expected after variable names."));
1684 for (i = 0; i < fx.name_cnt; i++)
1690 lex_error (_("expecting end of command"));
1697 for (i = 0; i < fx.name_cnt; i++)
1703 /* Obtains the real value for rpd_num_or_var N in case C and returns
1704 it. The valid range is nonnegative numbers, but numbers outside
1705 this range can be returned and should be handled by the caller as
1708 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1713 assert (n->num == 0);
1716 double v = c->data[n->var->fv].f;
1718 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1727 /* Parameter record passed to rpd_parse_record(). */
1728 struct rpd_parse_info
1730 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1731 const char *line; /* Line being parsed. */
1732 size_t len; /* Line length. */
1733 int beg, end; /* First and last column of first occurrence. */
1734 int ofs; /* Column offset between repeated occurrences. */
1735 struct ccase *c; /* Case to fill in. */
1736 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1737 int max_occurs; /* Max number of occurrences to parse. */
1740 /* Parses one record of repeated data and outputs corresponding
1741 cases. Returns number of occurrences parsed up to the
1742 maximum specified in INFO. */
1744 rpd_parse_record (const struct rpd_parse_info *info)
1746 struct repeating_data_trns *t = info->trns;
1747 int cur = info->beg;
1750 /* Handle record ID values. */
1753 union value id_temp[MAX_ELEMS_PER_VALUE];
1755 /* Parse record ID into V. */
1759 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1760 di.v = info->verify_id ? id_temp : t->id_value;
1763 di.format = t->id_spec;
1770 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1772 char expected_str [MAX_FORMATTED_LEN + 1];
1773 char actual_str [MAX_FORMATTED_LEN + 1];
1775 data_out (expected_str, &t->id_var->print, t->id_value);
1776 expected_str[t->id_var->print.w] = '\0';
1778 data_out (actual_str, &t->id_var->print, id_temp);
1779 actual_str[t->id_var->print.w] = '\0';
1782 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1783 actual_str, expected_str);
1789 /* Iterate over the set of expected occurrences and record each of
1790 them as a separate case. FIXME: We need to execute any
1791 transformations that follow the current one. */
1795 for (occurrences = 0; occurrences < info->max_occurs; )
1797 if (cur + info->ofs > info->end + 1)
1802 struct dls_var_spec *var_spec = t->first;
1804 for (; var_spec; var_spec = var_spec->next)
1806 int fc = var_spec->fc - 1 + cur;
1807 int lc = var_spec->lc - 1 + cur;
1809 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1814 _("Variable %s starting in column %d extends "
1815 "beyond physical record length of %d."),
1816 var_spec->v->name, fc, info->len);
1822 data_in_finite_line (&di, info->line, info->len, fc, lc);
1823 di.v = &info->c->data[var_spec->fv];
1826 di.format = var_spec->input;
1836 if (!t->write_case (t->wc_data))
1844 /* Reads one set of repetitions of the elements in the REPEATING
1845 DATA structure. Returns -1 on success, -2 on end of file or
1848 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c,
1849 int case_num UNUSED)
1851 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1853 char *line; /* Current record. */
1854 int len; /* Length of current record. */
1856 int starts_beg; /* Starting column. */
1857 int starts_end; /* Ending column. */
1858 int occurs; /* Number of repetitions. */
1859 int length; /* Length of each occurrence. */
1860 int cont_beg; /* Starting column for continuation lines. */
1861 int cont_end; /* Ending column for continuation lines. */
1863 int occurs_left; /* Number of occurrences remaining. */
1865 int code; /* Return value from rpd_parse_record(). */
1867 int skip_first_record = 0;
1869 dfm_push (t->handle);
1871 /* Read the current record. */
1872 dfm_bkwd_record (t->handle, 1);
1873 line = dfm_get_record (t->handle, &len);
1876 dfm_fwd_record (t->handle);
1878 /* Calculate occurs, length. */
1879 occurs_left = occurs = realize_value (&t->occurs, c);
1882 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1885 starts_beg = realize_value (&t->starts_beg, c);
1886 if (starts_beg <= 0)
1888 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1893 starts_end = realize_value (&t->starts_end, c);
1894 if (starts_end < starts_beg)
1896 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1897 "beginning column (%d)."),
1898 starts_end, starts_beg);
1899 skip_first_record = 1;
1901 length = realize_value (&t->length, c);
1904 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1906 occurs = occurs_left = 1;
1908 cont_beg = realize_value (&t->cont_beg, c);
1911 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1916 cont_end = realize_value (&t->cont_end, c);
1917 if (cont_end < cont_beg)
1919 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1920 "beginning column (%d)."),
1921 cont_end, cont_beg);
1925 /* Parse the first record. */
1926 if (!skip_first_record)
1928 struct rpd_parse_info info;
1932 info.beg = starts_beg;
1933 info.end = starts_end;
1937 info.max_occurs = occurs_left;
1938 code = rpd_parse_record (&info);
1941 occurs_left -= code;
1943 else if (cont_beg == 0)
1946 /* Make sure, if some occurrences are left, that we have
1947 continuation records. */
1948 if (occurs_left > 0 && cont_beg == 0)
1951 _("Number of repetitions specified on OCCURS (%d) "
1952 "exceed number of repetitions available in "
1953 "space on STARTS (%d), and CONTINUED not specified."),
1954 occurs, (starts_end - starts_beg + 1) / length);
1958 /* Go on to additional records. */
1959 while (occurs_left != 0)
1961 struct rpd_parse_info info;
1963 assert (occurs_left >= 0);
1965 /* Read in another record. */
1966 line = dfm_get_record (t->handle, &len);
1970 _("Unexpected end of file with %d repetitions "
1971 "remaining out of %d."),
1972 occurs_left, occurs);
1975 dfm_fwd_record (t->handle);
1977 /* Parse this record. */
1981 info.beg = cont_beg;
1982 info.end = cont_end;
1986 info.max_occurs = occurs_left;
1987 code = rpd_parse_record (&info);;
1990 occurs_left -= code;
1993 dfm_pop (t->handle);
1995 /* FIXME: This is a kluge until we've implemented multiplexing of
2000 /* Frees a REPEATING DATA transformation. */
2002 repeating_data_trns_free (struct trns_header *rpd_)
2004 struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_;
2006 destroy_dls_var_spec (rpd->first);
2007 fh_close_handle (rpd->handle);
2008 free (rpd->id_value);
2011 /* Lets repeating_data_trns_proc() know how to write the cases
2012 that it composes. Not elegant. */
2014 repeating_data_set_write_case (struct trns_header *trns,
2015 write_case_func *write_case,
2016 write_case_data wc_data)
2018 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
2020 assert (trns->proc == repeating_data_trns_proc);
2021 t->write_case = write_case;
2022 t->wc_data = wc_data;