1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "data-list.h"
30 #include "debug-print.h"
33 #include "file-handle.h"
43 /* Utility function. */
45 /* FIXME: Either REPEATING DATA must be the last transformation, or we
46 must multiplex the transformations that follow (i.e., perform them
47 for every case that we produce from a repetition instance).
48 Currently we do neither. We should do one or the other. */
50 /* Describes how to parse one variable. */
53 struct dls_var_spec *next; /* Next specification in list. */
55 /* Both free and fixed formats. */
56 struct fmt_spec input; /* Input format of this field. */
57 struct variable *v; /* Associated variable. Used only in
58 parsing. Not safe later. */
59 int fv; /* First value in case. */
61 /* Fixed format only. */
62 int rec; /* Record number (1-based). */
63 int fc, lc; /* Column numbers in record. */
65 /* Free format only. */
66 char name[9]; /* Name of variable. */
69 /* Constants for DATA LIST type. */
70 /* Must match table in cmd_data_list(). */
78 /* DATA LIST private data structure. */
83 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
84 struct file_handle *handle; /* Input file, never NULL. */
86 int type; /* A DLS_* constant. */
87 struct variable *end; /* Variable specified on END subcommand. */
88 int eof; /* End of file encountered. */
89 int nrec; /* Number of records. */
90 size_t case_size; /* Case size in bytes. */
93 static int parse_fixed (struct data_list_pgm *);
94 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
95 static void dump_fixed_table (const struct dls_var_spec *specs,
96 const struct file_handle *handle, int nrec);
97 static void dump_free_table (const struct data_list_pgm *);
98 static void destroy_dls_var_spec (struct dls_var_spec *);
99 static trns_free_func data_list_trns_free;
100 static trns_proc_func data_list_trns_proc;
102 /* Message title for REPEATING DATA. */
103 #define RPD_ERR "REPEATING DATA: "
108 /* DATA LIST program under construction. */
109 struct data_list_pgm *dls;
111 /* 0=print no table, 1=print table. (TABLE subcommand.) */
114 if (!case_source_is_complex (vfm_source))
115 discard_variables ();
117 dls = xmalloc (sizeof *dls);
118 dls->handle = default_handle;
123 dls->first = dls->last = NULL;
127 if (lex_match_id ("FILE"))
130 dls->handle = fh_parse_file_handle ();
133 if (case_source_is_class (vfm_source, &file_type_source_class)
134 && dls->handle != default_handle)
136 msg (SE, _("DATA LIST may not use a different file from "
137 "that specified on its surrounding FILE TYPE."));
141 else if (lex_match_id ("RECORDS"))
145 if (!lex_force_int ())
147 dls->nrec = lex_integer ();
151 else if (lex_match_id ("END"))
155 msg (SE, _("The END subcommand may only be specified once."));
160 if (!lex_force_id ())
162 dls->end = dict_lookup_var (default_dict, tokid);
164 dls->end = dict_create_var_assert (default_dict, tokid, 0);
167 else if (token == T_ID)
169 /* Must match DLS_* constants. */
170 static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE",
175 for (p = id; *p; p++)
176 if (lex_id_match (*p, tokid))
191 msg (SE, _("Only one of FIXED, FREE, or LIST may "
208 dls->case_size = dict_get_case_size (default_dict);
209 default_handle = dls->handle;
212 dls->type = DLS_FIXED;
216 if (dls->type == DLS_FREE)
222 if (dls->type == DLS_FIXED)
224 if (!parse_fixed (dls))
227 dump_fixed_table (dls->first, dls->handle, dls->nrec);
231 if (!parse_free (&dls->first, &dls->last))
234 dump_free_table (dls);
237 if (!dfm_open_for_reading (dls->handle))
240 if (vfm_source != NULL)
242 struct data_list_pgm *new_pgm;
244 dls->h.proc = data_list_trns_proc;
245 dls->h.free = data_list_trns_free;
247 new_pgm = xmalloc (sizeof *new_pgm);
248 memcpy (new_pgm, &dls, sizeof *new_pgm);
249 add_transformation (&new_pgm->h);
252 vfm_source = create_case_source (&data_list_source_class,
258 destroy_dls_var_spec (dls->first);
263 /* Adds SPEC to the linked list with head at FIRST and tail at
266 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
267 struct dls_var_spec *spec)
274 (*last)->next = spec;
278 /* Fixed-format parsing. */
280 /* Used for chaining together fortran-like format specifiers. */
283 struct fmt_list *next;
286 struct fmt_list *down;
289 /* State of parsing DATA LIST. */
290 struct fixed_parsing_state
292 char **name; /* Variable names. */
293 int name_cnt; /* Number of names. */
295 int recno; /* Index of current record. */
296 int sc; /* 1-based column number of starting column for
297 next field to output. */
300 static int fixed_parse_compatible (struct fixed_parsing_state *,
301 struct dls_var_spec **,
302 struct dls_var_spec **);
303 static int fixed_parse_fortran (struct fixed_parsing_state *,
304 struct dls_var_spec **,
305 struct dls_var_spec **);
307 /* Parses all the variable specifications for DATA LIST FIXED,
308 storing them into DLS. Returns nonzero if successful. */
310 parse_fixed (struct data_list_pgm *dls)
312 struct fixed_parsing_state fx;
320 while (lex_match ('/'))
323 if (lex_integer_p ())
325 if (lex_integer () < fx.recno)
327 msg (SE, _("The record number specified, %ld, is "
328 "before the previous record, %d. Data "
329 "fields must be listed in order of "
330 "increasing record number."),
331 lex_integer (), fx.recno - 1);
335 fx.recno = lex_integer ();
341 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
346 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
349 else if (token == '(')
351 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
356 msg (SE, _("SPSS-like or FORTRAN-like format "
357 "specification expected after variable names."));
361 for (i = 0; i < fx.name_cnt; i++)
365 if (dls->first == NULL)
367 msg (SE, _("At least one variable must be specified."));
370 if (dls->nrec && dls->last->rec > dls->nrec)
372 msg (SE, _("Variables are specified on records that "
373 "should not exist according to RECORDS subcommand."));
377 dls->nrec = dls->last->rec;
380 lex_error (_("expecting end of command"));
386 for (i = 0; i < fx.name_cnt; i++)
392 /* Parses a variable specification in the form 1-10 (A) based on
393 FX and adds specifications to the linked list with head at
394 FIRST and tail at LAST. */
396 fixed_parse_compatible (struct fixed_parsing_state *fx,
397 struct dls_var_spec **first, struct dls_var_spec **last)
399 struct fmt_spec input;
405 if (!lex_force_int ())
410 msg (SE, _("Column positions for fields must be positive."));
416 lex_negative_to_dash ();
419 if (!lex_force_int ())
424 msg (SE, _("Column positions for fields must be positive."));
429 msg (SE, _("The ending column for a field must be "
430 "greater than the starting column."));
439 /* Divide columns evenly. */
440 input.w = (lc - fc + 1) / fx->name_cnt;
441 if ((lc - fc + 1) % fx->name_cnt)
443 msg (SE, _("The %d columns %d-%d "
444 "can't be evenly divided into %d fields."),
445 lc - fc + 1, fc, lc, fx->name_cnt);
449 /* Format specifier. */
452 struct fmt_desc *fdp;
458 input.type = parse_format_specifier_name (&cp, 0);
459 if (input.type == -1)
463 msg (SE, _("A format specifier on this line "
464 "has extra characters on the end."));
474 if (lex_integer_p ())
476 if (lex_integer () < 1)
478 msg (SE, _("The value for number of decimal places "
479 "must be at least 1."));
483 input.d = lex_integer ();
489 fdp = &formats[input.type];
490 if (fdp->n_args < 2 && input.d)
492 msg (SE, _("Input format %s doesn't accept decimal places."),
500 if (!lex_force_match (')'))
508 if (!check_input_specifier (&input))
511 /* Start column for next specification. */
514 /* Width of variables to create. */
515 if (input.type == FMT_A || input.type == FMT_AHEX)
520 /* Create variables and var specs. */
521 for (i = 0; i < fx->name_cnt; i++)
523 struct dls_var_spec *spec;
526 v = dict_create_var (default_dict, fx->name[i], width);
529 convert_fmt_ItoO (&input, &v->print);
531 if (!case_source_is_complex (vfm_source))
536 v = dict_lookup_var_assert (default_dict, fx->name[i]);
537 if (vfm_source == NULL)
539 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
542 if ((width != 0) != (v->width != 0))
544 msg (SE, _("There is already a variable %s of a "
549 if (width != 0 && width != v->width)
551 msg (SE, _("There is already a string variable %s of a "
552 "different width."), fx->name[i]);
557 spec = xmalloc (sizeof *spec);
561 spec->rec = fx->recno;
562 spec->fc = fc + input.w * i;
563 spec->lc = spec->fc + input.w - 1;
564 append_var_spec (first, last, spec);
569 /* Destroy format list F and, if RECURSE is nonzero, all its
572 destroy_fmt_list (struct fmt_list *f, int recurse)
574 struct fmt_list *next;
579 if (recurse && f->f.type == FMT_DESCEND)
580 destroy_fmt_list (f->down, 1);
585 /* Takes a hierarchically structured fmt_list F as constructed by
586 fixed_parse_fortran(), and flattens it, adding the variable
587 specifications to the linked list with head FIRST and tail
588 LAST. NAME_IDX is used to take values from the list of names
589 in FX; it should initially point to a value of 0. */
591 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
592 struct dls_var_spec **first, struct dls_var_spec **last,
597 for (; f; f = f->next)
598 if (f->f.type == FMT_X)
600 else if (f->f.type == FMT_T)
602 else if (f->f.type == FMT_NEWREC)
604 fx->recno += f->count;
608 for (i = 0; i < f->count; i++)
609 if (f->f.type == FMT_DESCEND)
611 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
616 struct dls_var_spec *spec;
620 if (formats[f->f.type].cat & FCAT_STRING)
624 if (*name_idx >= fx->name_cnt)
626 msg (SE, _("The number of format "
627 "specifications exceeds the given number of "
632 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
635 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
639 if (!case_source_is_complex (vfm_source))
642 spec = xmalloc (sizeof *spec);
646 spec->rec = fx->recno;
648 spec->lc = fx->sc + f->f.w - 1;
649 append_var_spec (first, last, spec);
651 convert_fmt_ItoO (&spec->input, &v->print);
659 /* Recursively parses a FORTRAN-like format specification into
660 the linked list with head FIRST and tail TAIL. LEVEL is the
661 level of recursion, starting from 0. Returns the parsed
662 specification if successful, or a null pointer on failure. */
663 static struct fmt_list *
664 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
665 struct dls_var_spec **first,
666 struct dls_var_spec **last)
668 struct fmt_list *head = NULL;
669 struct fmt_list *tail = NULL;
671 lex_force_match ('(');
675 struct fmt_list *new = xmalloc (sizeof *new);
678 /* Append new to list. */
686 if (lex_integer_p ())
688 new->count = lex_integer ();
694 /* Parse format specifier. */
697 new->f.type = FMT_DESCEND;
698 new->down = fixed_parse_fortran_internal (fx, first, last);
699 if (new->down == NULL)
702 else if (lex_match ('/'))
703 new->f.type = FMT_NEWREC;
704 else if (!parse_format_specifier (&new->f, 1)
705 || !check_input_specifier (&new->f))
710 lex_force_match (')');
715 destroy_fmt_list (head, 0);
720 /* Parses a FORTRAN-like format specification into the linked
721 list with head FIRST and tail LAST. Returns nonzero if
724 fixed_parse_fortran (struct fixed_parsing_state *fx,
725 struct dls_var_spec **first, struct dls_var_spec **last)
727 struct fmt_list *list;
730 list = fixed_parse_fortran_internal (fx, first, last);
735 dump_fmt_list (fx, list, first, last, &name_idx);
736 destroy_fmt_list (list, 1);
737 if (name_idx < fx->name_cnt)
739 msg (SE, _("There aren't enough format specifications "
740 "to match the number of variable names given."));
747 /* Displays a table giving information on fixed-format variable
748 parsing on DATA LIST. */
749 /* FIXME: The `Columns' column should be divided into three columns,
750 one for the starting column, one for the dash, one for the ending
751 column; then right-justify the starting column and left-justify the
754 dump_fixed_table (const struct dls_var_spec *specs,
755 const struct file_handle *handle, int nrec)
757 const struct dls_var_spec *spec;
760 const char *filename;
763 for (i = 0, spec = specs; spec; spec = spec->next)
765 t = tab_create (4, i + 1, 0);
766 tab_columns (t, TAB_COL_DOWN, 1);
767 tab_headers (t, 0, 0, 1, 0);
768 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
769 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
770 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
771 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
772 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
773 tab_hline (t, TAL_2, 0, 3, 1);
774 tab_dim (t, tab_natural_dimensions);
776 for (i = 1, spec = specs; spec; spec = spec->next, i++)
778 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
779 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
780 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
782 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
783 fmt_to_string (&spec->input));
786 filename = handle_get_filename (handle);
787 if (filename == NULL)
789 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
790 sprintf (buf, (handle != inline_file
791 ? ngettext ("Reading %d record from file %s.",
792 "Reading %d records from file %s.", nrec)
793 : ngettext ("Reading %d record from the command file.",
794 "Reading %d records from the command file.",
798 tab_title (t, 0, buf);
803 /* Free-format parsing. */
805 /* Parses variable specifications for DATA LIST FREE and adds
806 them to the linked list with head FIRST and tail LAST.
807 Returns nonzero only if successful. */
809 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
814 struct fmt_spec input, output;
820 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
824 if (!parse_format_specifier (&input, 0)
825 || !check_input_specifier (&input)
826 || !lex_force_match (')'))
828 for (i = 0; i < name_cnt; i++)
833 convert_fmt_ItoO (&input, &output);
841 output = get_format();
844 if (input.type == FMT_A || input.type == FMT_AHEX)
848 for (i = 0; i < name_cnt; i++)
850 struct dls_var_spec *spec;
853 v = dict_create_var (default_dict, name[i], width);
856 msg (SE, _("%s is a duplicate variable name."), name[i]);
859 v->print = v->write = output;
861 if (!case_source_is_complex (vfm_source))
864 spec = xmalloc (sizeof *spec);
868 strcpy (spec->name, name[i]);
869 append_var_spec (first, last, spec);
871 for (i = 0; i < name_cnt; i++)
877 lex_error (_("expecting end of command"));
881 /* Displays a table giving information on free-format variable parsing
884 dump_free_table (const struct data_list_pgm *dls)
890 struct dls_var_spec *spec;
891 for (i = 0, spec = dls->first; spec; spec = spec->next)
895 t = tab_create (2, i + 1, 0);
896 tab_columns (t, TAB_COL_DOWN, 1);
897 tab_headers (t, 0, 0, 1, 0);
898 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
899 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
900 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
901 tab_hline (t, TAL_2, 0, 1, 1);
902 tab_dim (t, tab_natural_dimensions);
905 struct dls_var_spec *spec;
907 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
909 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
910 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
915 const char *filename;
917 filename = handle_get_filename (dls->handle);
918 if (filename == NULL)
921 (dls->handle != inline_file
922 ? _("Reading free-form data from file %s.")
923 : _("Reading free-form data from the command file.")),
930 /* Input procedure. */
932 /* Extracts a field from the current position in the current record.
933 Fields can be unquoted or quoted with single- or double-quote
934 characters. *RET_LEN is set to the field length, *RET_CP is set to
935 the field itself. After parsing the field, sets the current
936 position in the record to just past the field. Returns 0 on
937 failure or a 1-based column number indicating the beginning of the
940 cut_field (const struct data_list_pgm *dls, char **ret_cp, int *ret_len)
945 cp = dfm_get_record (dls->handle, &len);
951 /* Skip leading whitespace and commas. */
952 while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep)
957 /* Three types of fields: quoted with ', quoted with ", unquoted. */
958 if (*cp == '\'' || *cp == '"')
963 while (cp < ep && *cp != quote)
965 *ret_len = cp - *ret_cp;
969 msg (SW, _("Scope of string exceeds line."));
974 while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',')
976 *ret_len = cp - *ret_cp;
980 int beginning_column;
982 dfm_set_record (dls->handle, *ret_cp);
983 beginning_column = dfm_get_cur_col (dls->handle) + 1;
985 dfm_set_record (dls->handle, cp);
987 return beginning_column;
991 typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *);
992 static data_list_read_func read_from_data_list_fixed;
993 static data_list_read_func read_from_data_list_free;
994 static data_list_read_func read_from_data_list_list;
996 /* Returns the proper function to read the kind of DATA LIST
997 data specified by DLS. */
998 static data_list_read_func *
999 get_data_list_read_func (const struct data_list_pgm *dls)
1004 return read_from_data_list_fixed;
1007 return read_from_data_list_free;
1010 return read_from_data_list_list;
1018 /* Reads a case from the data file into C, parsing it according
1019 to fixed-format syntax rules in DLS. Returns -1 on success,
1020 -2 at end of file. */
1022 read_from_data_list_fixed (const struct data_list_pgm *dls,
1025 struct dls_var_spec *var_spec = dls->first;
1028 if (!dfm_get_record (dls->handle, NULL))
1030 for (i = 1; i <= dls->nrec; i++)
1033 char *line = dfm_get_record (dls->handle, &len);
1037 /* Note that this can't occur on the first record. */
1038 msg (SW, _("Partial case of %d of %d records discarded."),
1043 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1047 data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc);
1048 di.v = &c->data[var_spec->fv];
1050 di.f1 = var_spec->fc;
1051 di.format = var_spec->input;
1056 dfm_fwd_record (dls->handle);
1062 /* Reads a case from the data file into C, parsing it according
1063 to free-format syntax rules in DLS. Returns -1 on success,
1064 -2 at end of file. */
1066 read_from_data_list_free (const struct data_list_pgm *dls,
1069 struct dls_var_spec *var_spec;
1073 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1077 /* Cut out a field and read in a new record if necessary. */
1080 column = cut_field (dls, &field, &len);
1084 if (dfm_get_record (dls->handle, NULL))
1085 dfm_fwd_record (dls->handle);
1086 if (!dfm_get_record (dls->handle, NULL))
1088 if (var_spec != dls->first)
1089 msg (SW, _("Partial case discarded. The first variable "
1090 "missing was %s."), var_spec->name);
1100 di.v = &c->data[var_spec->fv];
1103 di.format = var_spec->input;
1110 /* Reads a case from the data file and parses it according to
1111 list-format syntax rules. Returns -1 on success, -2 at end of
1114 read_from_data_list_list (const struct data_list_pgm *dls,
1117 struct dls_var_spec *var_spec;
1121 if (!dfm_get_record (dls->handle, NULL))
1124 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1126 /* Cut out a field and check for end-of-line. */
1127 int column = cut_field (dls, &field, &len);
1131 if (get_undefined() )
1132 msg (SW, _("Missing value(s) for all variables from %s onward. "
1133 "These will be filled with the system-missing value "
1134 "or blanks, as appropriate."),
1136 for (; var_spec; var_spec = var_spec->next)
1138 int width = get_format_var_width (&var_spec->input);
1140 c->data[var_spec->fv].f = SYSMIS;
1142 memset (c->data[var_spec->fv].s, ' ', width);
1152 di.v = &c->data[var_spec->fv];
1155 di.format = var_spec->input;
1160 dfm_fwd_record (dls->handle);
1164 /* Destroys SPEC. */
1166 destroy_dls_var_spec (struct dls_var_spec *spec)
1168 struct dls_var_spec *next;
1170 while (spec != NULL)
1178 /* Destroys DATA LIST transformation PGM. */
1180 data_list_trns_free (struct trns_header *pgm)
1182 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1183 destroy_dls_var_spec (dls->first);
1184 fh_close_handle (dls->handle);
1188 /* Handle DATA LIST transformation T, parsing data into C. */
1190 data_list_trns_proc (struct trns_header *t, struct ccase *c,
1191 int case_num UNUSED)
1193 struct data_list_pgm *dls = (struct data_list_pgm *) t;
1194 data_list_read_func *read_func;
1197 dfm_push (dls->handle);
1199 read_func = get_data_list_read_func (dls);
1200 retval = read_func (dls, c);
1202 /* Handle end of file. */
1205 /* If we already encountered end of file then this is an
1209 msg (SE, _("Attempt to read past end of file."));
1211 dfm_pop (dls->handle);
1215 /* Otherwise simply note it. */
1221 /* If there was an END subcommand handle it. */
1222 if (dls->end != NULL)
1226 c->data[dls->end->fv].f = 1.0;
1230 c->data[dls->end->fv].f = 0.0;
1233 dfm_pop (dls->handle);
1238 /* Reads all the records from the data file and passes them to
1241 data_list_source_read (struct case_source *source,
1243 write_case_func *write_case, write_case_data wc_data)
1245 struct data_list_pgm *dls = source->aux;
1246 data_list_read_func *read_func = get_data_list_read_func (dls);
1248 dfm_push (dls->handle);
1249 while (read_func (dls, c) != -2)
1250 if (!write_case (wc_data))
1252 dfm_pop (dls->handle);
1254 fh_close_handle (dls->handle);
1257 /* Destroys the source's internal data. */
1259 data_list_source_destroy (struct case_source *source)
1261 data_list_trns_free (source->aux);
1264 const struct case_source_class data_list_source_class =
1268 data_list_source_read,
1269 data_list_source_destroy,
1272 /* REPEATING DATA. */
1274 /* Represents a number or a variable. */
1275 struct rpd_num_or_var
1277 int num; /* Value, or 0. */
1278 struct variable *var; /* Variable, if number==0. */
1281 /* REPEATING DATA private data structure. */
1282 struct repeating_data_trns
1284 struct trns_header h;
1285 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1286 struct file_handle *handle; /* Input file, never NULL. */
1288 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1289 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1290 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1291 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1292 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1293 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1295 /* ID subcommand. */
1296 int id_beg, id_end; /* Beginning & end columns. */
1297 struct variable *id_var; /* DATA LIST variable. */
1298 struct fmt_spec id_spec; /* Input format spec. */
1299 union value *id_value; /* ID value. */
1301 write_case_func *write_case;
1302 write_case_data wc_data;
1305 static trns_free_func repeating_data_trns_free;
1306 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1307 static int parse_repeating_data (struct dls_var_spec **,
1308 struct dls_var_spec **);
1309 static void find_variable_input_spec (struct variable *v,
1310 struct fmt_spec *spec);
1312 /* Parses the REPEATING DATA command. */
1314 cmd_repeating_data (void)
1316 struct repeating_data_trns *rpd;
1318 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1321 /* Bits are set when a particular subcommand has been seen. */
1324 assert (case_source_is_complex (vfm_source));
1326 rpd = xmalloc (sizeof *rpd);
1327 rpd->handle = default_handle;
1328 rpd->first = rpd->last = NULL;
1329 rpd->starts_beg.num = 0;
1330 rpd->starts_beg.var = NULL;
1331 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1332 = rpd->cont_end = rpd->starts_beg;
1333 rpd->id_beg = rpd->id_end = 0;
1335 rpd->id_value = NULL;
1341 if (lex_match_id ("FILE"))
1344 rpd->handle = fh_parse_file_handle ();
1347 if (rpd->handle != default_handle)
1349 msg (SE, _("REPEATING DATA must use the same file as its "
1350 "corresponding DATA LIST or FILE TYPE."));
1354 else if (lex_match_id ("STARTS"))
1359 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1364 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1367 lex_negative_to_dash ();
1368 if (lex_match ('-'))
1370 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1373 /* Otherwise, rpd->starts_end is left uninitialized.
1374 This is okay. We will initialize it later from the
1375 record length of the file. We can't do this now
1376 because we can't be sure that the user has specified
1377 the file handle yet. */
1380 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1381 && rpd->starts_beg.num > rpd->starts_end.num)
1383 msg (SE, _("STARTS beginning column (%d) exceeds "
1384 "STARTS ending column (%d)."),
1385 rpd->starts_beg.num, rpd->starts_end.num);
1389 else if (lex_match_id ("OCCURS"))
1394 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1399 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1402 else if (lex_match_id ("LENGTH"))
1407 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1412 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1415 else if (lex_match_id ("CONTINUED"))
1420 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1425 if (!lex_match ('/'))
1427 if (!parse_num_or_var (&rpd->cont_beg, "CONTINUED beginning column"))
1430 lex_negative_to_dash ();
1432 && !parse_num_or_var (&rpd->cont_end,
1433 "CONTINUED ending column"))
1436 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1437 && rpd->cont_beg.num > rpd->cont_end.num)
1439 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1440 "CONTINUED ending column (%d)."),
1441 rpd->cont_beg.num, rpd->cont_end.num);
1446 rpd->cont_beg.num = 1;
1448 else if (lex_match_id ("ID"))
1453 msg (SE, _("%s subcommand given multiple times."),"ID");
1458 if (!lex_force_int ())
1460 if (lex_integer () < 1)
1462 msg (SE, _("ID beginning column (%ld) must be positive."),
1466 rpd->id_beg = lex_integer ();
1469 lex_negative_to_dash ();
1471 if (lex_match ('-'))
1473 if (!lex_force_int ())
1475 if (lex_integer () < 1)
1477 msg (SE, _("ID ending column (%ld) must be positive."),
1481 if (lex_integer () < rpd->id_end)
1483 msg (SE, _("ID ending column (%ld) cannot be less than "
1484 "ID beginning column (%d)."),
1485 lex_integer (), rpd->id_beg);
1489 rpd->id_end = lex_integer ();
1492 else rpd->id_end = rpd->id_beg;
1494 if (!lex_force_match ('='))
1496 rpd->id_var = parse_variable ();
1497 if (rpd->id_var == NULL)
1500 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1501 rpd->id_value = xmalloc (sizeof *rpd->id_value * rpd->id_var->nv);
1503 else if (lex_match_id ("TABLE"))
1505 else if (lex_match_id ("NOTABLE"))
1507 else if (lex_match_id ("DATA"))
1515 if (!lex_force_match ('/'))
1519 /* Comes here when DATA specification encountered. */
1520 if ((seen & (1 | 2)) != (1 | 2))
1522 if ((seen & 1) == 0)
1523 msg (SE, _("Missing required specification STARTS."));
1524 if ((seen & 2) == 0)
1525 msg (SE, _("Missing required specification OCCURS."));
1529 /* Enforce ID restriction. */
1530 if ((seen & 16) && !(seen & 8))
1532 msg (SE, _("ID specified without CONTINUED."));
1536 /* Calculate starts_end, cont_end if necessary. */
1537 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1538 rpd->starts_end.num = handle_get_record_width (rpd->handle);
1539 if (rpd->cont_end.num == 0 && rpd->starts_end.var == NULL)
1540 rpd->cont_end.num = handle_get_record_width (rpd->handle);
1542 /* Calculate length if possible. */
1543 if ((seen & 4) == 0)
1545 struct dls_var_spec *iter;
1547 for (iter = rpd->first; iter; iter = iter->next)
1549 if (iter->lc > rpd->length.num)
1550 rpd->length.num = iter->lc;
1552 assert (rpd->length.num != 0);
1556 if (!parse_repeating_data (&rpd->first, &rpd->last))
1560 dump_fixed_table (rpd->first, rpd->handle, rpd->last->rec);
1563 struct repeating_data_trns *new_trns;
1565 rpd->h.proc = repeating_data_trns_proc;
1566 rpd->h.free = repeating_data_trns_free;
1568 new_trns = xmalloc (sizeof *new_trns);
1569 memcpy (new_trns, &rpd, sizeof *new_trns);
1570 add_transformation ((struct trns_header *) new_trns);
1573 return lex_end_of_command ();
1576 destroy_dls_var_spec (rpd->first);
1577 free (rpd->id_value);
1581 /* Finds the input format specification for variable V and puts
1582 it in SPEC. Because of the way that DATA LIST is structured,
1583 this is nontrivial. */
1585 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1589 for (i = 0; i < n_trns; i++)
1591 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1593 if (pgm->h.proc == data_list_trns_proc)
1595 struct dls_var_spec *iter;
1597 for (iter = pgm->first; iter; iter = iter->next)
1600 *spec = iter->input;
1609 /* Parses a number or a variable name from the syntax file and puts
1610 the results in VALUE. Ensures that the number is at least 1; else
1611 emits an error based on MESSAGE. Returns nonzero only if
1614 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1619 value->var = parse_variable ();
1620 if (value->var == NULL)
1622 if (value->var->type == ALPHA)
1624 msg (SE, _("String variable not allowed here."));
1628 else if (lex_integer_p ())
1630 value->num = lex_integer ();
1634 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1640 msg (SE, _("Variable or integer expected for %s."), message);
1646 /* Parses data specifications for repeating data groups, adding
1647 them to the linked list with head FIRST and tail LAST.
1648 Returns nonzero only if successful. */
1650 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1652 struct fixed_parsing_state fx;
1658 while (token != '.')
1660 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1665 if (!fixed_parse_compatible (&fx, first, last))
1668 else if (token == '(')
1670 if (!fixed_parse_fortran (&fx, first, last))
1675 msg (SE, _("SPSS-like or FORTRAN-like format "
1676 "specification expected after variable names."));
1680 for (i = 0; i < fx.name_cnt; i++)
1686 lex_error (_("expecting end of command"));
1693 for (i = 0; i < fx.name_cnt; i++)
1699 /* Obtains the real value for rpd_num_or_var N in case C and returns
1700 it. The valid range is nonnegative numbers, but numbers outside
1701 this range can be returned and should be handled by the caller as
1704 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1709 assert (n->num == 0);
1712 double v = c->data[n->var->fv].f;
1714 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1723 /* Parameter record passed to rpd_parse_record(). */
1724 struct rpd_parse_info
1726 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1727 const char *line; /* Line being parsed. */
1728 size_t len; /* Line length. */
1729 int beg, end; /* First and last column of first occurrence. */
1730 int ofs; /* Column offset between repeated occurrences. */
1731 struct ccase *c; /* Case to fill in. */
1732 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1733 int max_occurs; /* Max number of occurrences to parse. */
1736 /* Parses one record of repeated data and outputs corresponding
1737 cases. Returns number of occurrences parsed up to the
1738 maximum specified in INFO. */
1740 rpd_parse_record (const struct rpd_parse_info *info)
1742 struct repeating_data_trns *t = info->trns;
1743 int cur = info->beg;
1746 /* Handle record ID values. */
1749 union value id_temp[MAX_ELEMS_PER_VALUE];
1751 /* Parse record ID into V. */
1755 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1756 di.v = info->verify_id ? id_temp : t->id_value;
1759 di.format = t->id_spec;
1766 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1768 char expected_str [MAX_FORMATTED_LEN + 1];
1769 char actual_str [MAX_FORMATTED_LEN + 1];
1771 data_out (expected_str, &t->id_var->print, t->id_value);
1772 expected_str[t->id_var->print.w] = '\0';
1774 data_out (actual_str, &t->id_var->print, id_temp);
1775 actual_str[t->id_var->print.w] = '\0';
1778 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1779 actual_str, expected_str);
1785 /* Iterate over the set of expected occurrences and record each of
1786 them as a separate case. FIXME: We need to execute any
1787 transformations that follow the current one. */
1791 for (occurrences = 0; occurrences < info->max_occurs; )
1793 if (cur + info->ofs > info->end + 1)
1798 struct dls_var_spec *var_spec = t->first;
1800 for (; var_spec; var_spec = var_spec->next)
1802 int fc = var_spec->fc - 1 + cur;
1803 int lc = var_spec->lc - 1 + cur;
1805 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1810 _("Variable %s starting in column %d extends "
1811 "beyond physical record length of %d."),
1812 var_spec->v->name, fc, info->len);
1818 data_in_finite_line (&di, info->line, info->len, fc, lc);
1819 di.v = &info->c->data[var_spec->fv];
1822 di.format = var_spec->input;
1832 if (!t->write_case (t->wc_data))
1840 /* Reads one set of repetitions of the elements in the REPEATING
1841 DATA structure. Returns -1 on success, -2 on end of file or
1844 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c,
1845 int case_num UNUSED)
1847 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1849 char *line; /* Current record. */
1850 int len; /* Length of current record. */
1852 int starts_beg; /* Starting column. */
1853 int starts_end; /* Ending column. */
1854 int occurs; /* Number of repetitions. */
1855 int length; /* Length of each occurrence. */
1856 int cont_beg; /* Starting column for continuation lines. */
1857 int cont_end; /* Ending column for continuation lines. */
1859 int occurs_left; /* Number of occurrences remaining. */
1861 int code; /* Return value from rpd_parse_record(). */
1863 int skip_first_record = 0;
1865 dfm_push (t->handle);
1867 /* Read the current record. */
1868 dfm_bkwd_record (t->handle, 1);
1869 line = dfm_get_record (t->handle, &len);
1872 dfm_fwd_record (t->handle);
1874 /* Calculate occurs, length. */
1875 occurs_left = occurs = realize_value (&t->occurs, c);
1878 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1881 starts_beg = realize_value (&t->starts_beg, c);
1882 if (starts_beg <= 0)
1884 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1889 starts_end = realize_value (&t->starts_end, c);
1890 if (starts_end < starts_beg)
1892 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1893 "beginning column (%d)."),
1894 starts_end, starts_beg);
1895 skip_first_record = 1;
1897 length = realize_value (&t->length, c);
1900 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1902 occurs = occurs_left = 1;
1904 cont_beg = realize_value (&t->cont_beg, c);
1907 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1912 cont_end = realize_value (&t->cont_end, c);
1913 if (cont_end < cont_beg)
1915 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1916 "beginning column (%d)."),
1917 cont_end, cont_beg);
1921 /* Parse the first record. */
1922 if (!skip_first_record)
1924 struct rpd_parse_info info;
1928 info.beg = starts_beg;
1929 info.end = starts_end;
1933 info.max_occurs = occurs_left;
1934 code = rpd_parse_record (&info);
1937 occurs_left -= code;
1939 else if (cont_beg == 0)
1942 /* Make sure, if some occurrences are left, that we have
1943 continuation records. */
1944 if (occurs_left > 0 && cont_beg == 0)
1947 _("Number of repetitions specified on OCCURS (%d) "
1948 "exceed number of repetitions available in "
1949 "space on STARTS (%d), and CONTINUED not specified."),
1950 occurs, (starts_end - starts_beg + 1) / length);
1954 /* Go on to additional records. */
1955 while (occurs_left != 0)
1957 struct rpd_parse_info info;
1959 assert (occurs_left >= 0);
1961 /* Read in another record. */
1962 line = dfm_get_record (t->handle, &len);
1966 _("Unexpected end of file with %d repetitions "
1967 "remaining out of %d."),
1968 occurs_left, occurs);
1971 dfm_fwd_record (t->handle);
1973 /* Parse this record. */
1977 info.beg = cont_beg;
1978 info.end = cont_end;
1982 info.max_occurs = occurs_left;
1983 code = rpd_parse_record (&info);;
1986 occurs_left -= code;
1989 dfm_pop (t->handle);
1991 /* FIXME: This is a kluge until we've implemented multiplexing of
1996 /* Frees a REPEATING DATA transformation. */
1998 repeating_data_trns_free (struct trns_header *rpd_)
2000 struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_;
2002 destroy_dls_var_spec (rpd->first);
2003 fh_close_handle (rpd->handle);
2004 free (rpd->id_value);
2007 /* Lets repeating_data_trns_proc() know how to write the cases
2008 that it composes. Not elegant. */
2010 repeating_data_set_write_case (struct trns_header *trns,
2011 write_case_func *write_case,
2012 write_case_data wc_data)
2014 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
2016 assert (trns->proc == repeating_data_trns_proc);
2017 t->write_case = write_case;
2018 t->wc_data = wc_data;