1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "data-list.h"
31 #include "debug-print.h"
34 #include "file-handle.h"
44 /* Utility function. */
46 /* FIXME: Either REPEATING DATA must be the last transformation, or we
47 must multiplex the transformations that follow (i.e., perform them
48 for every case that we produce from a repetition instance).
49 Currently we do neither. We should do one or the other. */
51 /* Describes how to parse one variable. */
54 struct dls_var_spec *next; /* Next specification in list. */
56 /* Both free and fixed formats. */
57 struct fmt_spec input; /* Input format of this field. */
58 struct variable *v; /* Associated variable. Used only in
59 parsing. Not safe later. */
60 int fv; /* First value in case. */
62 /* Fixed format only. */
63 int rec; /* Record number (1-based). */
64 int fc, lc; /* Column numbers in record. */
66 /* Free format only. */
67 char name[9]; /* Name of variable. */
70 /* Constants for DATA LIST type. */
71 /* Must match table in cmd_data_list(). */
79 /* DATA LIST private data structure. */
84 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
85 struct file_handle *handle; /* Input file, never NULL. */
87 int type; /* A DLS_* constant. */
88 struct variable *end; /* Variable specified on END subcommand. */
89 int eof; /* End of file encountered. */
90 int nrec; /* Number of records. */
91 size_t case_size; /* Case size in bytes. */
92 char *delims; /* Delimiters if any; not null-terminated. */
93 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
96 static int parse_fixed (struct data_list_pgm *);
97 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
98 static void dump_fixed_table (const struct dls_var_spec *specs,
99 const struct file_handle *handle, int nrec);
100 static void dump_free_table (const struct data_list_pgm *);
101 static void destroy_dls_var_spec (struct dls_var_spec *);
102 static trns_free_func data_list_trns_free;
103 static trns_proc_func data_list_trns_proc;
105 /* Message title for REPEATING DATA. */
106 #define RPD_ERR "REPEATING DATA: "
111 /* DATA LIST program under construction. */
112 struct data_list_pgm *dls;
114 /* 0=print no table, 1=print table. (TABLE subcommand.) */
117 if (!case_source_is_complex (vfm_source))
118 discard_variables ();
120 dls = xmalloc (sizeof *dls);
121 dls->handle = default_handle;
128 dls->first = dls->last = NULL;
132 if (lex_match_id ("FILE"))
135 dls->handle = fh_parse_file_handle ();
138 if (case_source_is_class (vfm_source, &file_type_source_class)
139 && dls->handle != default_handle)
141 msg (SE, _("DATA LIST may not use a different file from "
142 "that specified on its surrounding FILE TYPE."));
146 else if (lex_match_id ("RECORDS"))
150 if (!lex_force_int ())
152 dls->nrec = lex_integer ();
156 else if (lex_match_id ("END"))
160 msg (SE, _("The END subcommand may only be specified once."));
165 if (!lex_force_id ())
167 dls->end = dict_lookup_var (default_dict, tokid);
169 dls->end = dict_create_var_assert (default_dict, tokid, 0);
172 else if (token == T_ID)
174 if (lex_match_id ("NOTABLE"))
176 else if (lex_match_id ("TABLE"))
181 if (lex_match_id ("FIXED"))
183 else if (lex_match_id ("FREE"))
185 else if (lex_match_id ("LIST"))
195 msg (SE, _("Only one of FIXED, FREE, or LIST may "
201 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
204 while (!lex_match (')'))
208 if (lex_match_id ("TAB"))
210 else if (token == T_STRING && tokstr.length == 1)
211 delim = tokstr.string[0];
218 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
219 dls->delims[dls->delim_cnt++] = delim;
233 dls->case_size = dict_get_case_size (default_dict);
234 default_handle = dls->handle;
237 dls->type = DLS_FIXED;
241 if (dls->type == DLS_FREE)
247 if (dls->type == DLS_FIXED)
249 if (!parse_fixed (dls))
252 dump_fixed_table (dls->first, dls->handle, dls->nrec);
256 if (!parse_free (&dls->first, &dls->last))
259 dump_free_table (dls);
262 if (!dfm_open_for_reading (dls->handle))
265 if (vfm_source != NULL)
267 struct data_list_pgm *new_pgm;
269 dls->h.proc = data_list_trns_proc;
270 dls->h.free = data_list_trns_free;
272 new_pgm = xmalloc (sizeof *new_pgm);
273 memcpy (new_pgm, &dls, sizeof *new_pgm);
274 add_transformation (&new_pgm->h);
277 vfm_source = create_case_source (&data_list_source_class,
283 destroy_dls_var_spec (dls->first);
289 /* Adds SPEC to the linked list with head at FIRST and tail at
292 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
293 struct dls_var_spec *spec)
300 (*last)->next = spec;
304 /* Fixed-format parsing. */
306 /* Used for chaining together fortran-like format specifiers. */
309 struct fmt_list *next;
312 struct fmt_list *down;
315 /* State of parsing DATA LIST. */
316 struct fixed_parsing_state
318 char **name; /* Variable names. */
319 int name_cnt; /* Number of names. */
321 int recno; /* Index of current record. */
322 int sc; /* 1-based column number of starting column for
323 next field to output. */
326 static int fixed_parse_compatible (struct fixed_parsing_state *,
327 struct dls_var_spec **,
328 struct dls_var_spec **);
329 static int fixed_parse_fortran (struct fixed_parsing_state *,
330 struct dls_var_spec **,
331 struct dls_var_spec **);
333 /* Parses all the variable specifications for DATA LIST FIXED,
334 storing them into DLS. Returns nonzero if successful. */
336 parse_fixed (struct data_list_pgm *dls)
338 struct fixed_parsing_state fx;
346 while (lex_match ('/'))
349 if (lex_integer_p ())
351 if (lex_integer () < fx.recno)
353 msg (SE, _("The record number specified, %ld, is "
354 "before the previous record, %d. Data "
355 "fields must be listed in order of "
356 "increasing record number."),
357 lex_integer (), fx.recno - 1);
361 fx.recno = lex_integer ();
367 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
372 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
375 else if (token == '(')
377 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
382 msg (SE, _("SPSS-like or FORTRAN-like format "
383 "specification expected after variable names."));
387 for (i = 0; i < fx.name_cnt; i++)
391 if (dls->first == NULL)
393 msg (SE, _("At least one variable must be specified."));
396 if (dls->nrec && dls->last->rec > dls->nrec)
398 msg (SE, _("Variables are specified on records that "
399 "should not exist according to RECORDS subcommand."));
403 dls->nrec = dls->last->rec;
406 lex_error (_("expecting end of command"));
412 for (i = 0; i < fx.name_cnt; i++)
418 /* Parses a variable specification in the form 1-10 (A) based on
419 FX and adds specifications to the linked list with head at
420 FIRST and tail at LAST. */
422 fixed_parse_compatible (struct fixed_parsing_state *fx,
423 struct dls_var_spec **first, struct dls_var_spec **last)
425 struct fmt_spec input;
431 if (!lex_force_int ())
436 msg (SE, _("Column positions for fields must be positive."));
442 lex_negative_to_dash ();
445 if (!lex_force_int ())
450 msg (SE, _("Column positions for fields must be positive."));
455 msg (SE, _("The ending column for a field must be "
456 "greater than the starting column."));
465 /* Divide columns evenly. */
466 input.w = (lc - fc + 1) / fx->name_cnt;
467 if ((lc - fc + 1) % fx->name_cnt)
469 msg (SE, _("The %d columns %d-%d "
470 "can't be evenly divided into %d fields."),
471 lc - fc + 1, fc, lc, fx->name_cnt);
475 /* Format specifier. */
478 struct fmt_desc *fdp;
484 input.type = parse_format_specifier_name (&cp, 0);
485 if (input.type == -1)
489 msg (SE, _("A format specifier on this line "
490 "has extra characters on the end."));
500 if (lex_integer_p ())
502 if (lex_integer () < 1)
504 msg (SE, _("The value for number of decimal places "
505 "must be at least 1."));
509 input.d = lex_integer ();
515 fdp = &formats[input.type];
516 if (fdp->n_args < 2 && input.d)
518 msg (SE, _("Input format %s doesn't accept decimal places."),
526 if (!lex_force_match (')'))
534 if (!check_input_specifier (&input))
537 /* Start column for next specification. */
540 /* Width of variables to create. */
541 if (input.type == FMT_A || input.type == FMT_AHEX)
546 /* Create variables and var specs. */
547 for (i = 0; i < fx->name_cnt; i++)
549 struct dls_var_spec *spec;
552 v = dict_create_var (default_dict, fx->name[i], width);
555 convert_fmt_ItoO (&input, &v->print);
557 if (!case_source_is_complex (vfm_source))
562 v = dict_lookup_var_assert (default_dict, fx->name[i]);
563 if (vfm_source == NULL)
565 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
568 if ((width != 0) != (v->width != 0))
570 msg (SE, _("There is already a variable %s of a "
575 if (width != 0 && width != v->width)
577 msg (SE, _("There is already a string variable %s of a "
578 "different width."), fx->name[i]);
583 spec = xmalloc (sizeof *spec);
587 spec->rec = fx->recno;
588 spec->fc = fc + input.w * i;
589 spec->lc = spec->fc + input.w - 1;
590 append_var_spec (first, last, spec);
595 /* Destroy format list F and, if RECURSE is nonzero, all its
598 destroy_fmt_list (struct fmt_list *f, int recurse)
600 struct fmt_list *next;
605 if (recurse && f->f.type == FMT_DESCEND)
606 destroy_fmt_list (f->down, 1);
611 /* Takes a hierarchically structured fmt_list F as constructed by
612 fixed_parse_fortran(), and flattens it, adding the variable
613 specifications to the linked list with head FIRST and tail
614 LAST. NAME_IDX is used to take values from the list of names
615 in FX; it should initially point to a value of 0. */
617 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
618 struct dls_var_spec **first, struct dls_var_spec **last,
623 for (; f; f = f->next)
624 if (f->f.type == FMT_X)
626 else if (f->f.type == FMT_T)
628 else if (f->f.type == FMT_NEWREC)
630 fx->recno += f->count;
634 for (i = 0; i < f->count; i++)
635 if (f->f.type == FMT_DESCEND)
637 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
642 struct dls_var_spec *spec;
646 if (formats[f->f.type].cat & FCAT_STRING)
650 if (*name_idx >= fx->name_cnt)
652 msg (SE, _("The number of format "
653 "specifications exceeds the given number of "
658 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
661 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
665 if (!case_source_is_complex (vfm_source))
668 spec = xmalloc (sizeof *spec);
672 spec->rec = fx->recno;
674 spec->lc = fx->sc + f->f.w - 1;
675 append_var_spec (first, last, spec);
677 convert_fmt_ItoO (&spec->input, &v->print);
685 /* Recursively parses a FORTRAN-like format specification into
686 the linked list with head FIRST and tail TAIL. LEVEL is the
687 level of recursion, starting from 0. Returns the parsed
688 specification if successful, or a null pointer on failure. */
689 static struct fmt_list *
690 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
691 struct dls_var_spec **first,
692 struct dls_var_spec **last)
694 struct fmt_list *head = NULL;
695 struct fmt_list *tail = NULL;
697 lex_force_match ('(');
701 struct fmt_list *new = xmalloc (sizeof *new);
704 /* Append new to list. */
712 if (lex_integer_p ())
714 new->count = lex_integer ();
720 /* Parse format specifier. */
723 new->f.type = FMT_DESCEND;
724 new->down = fixed_parse_fortran_internal (fx, first, last);
725 if (new->down == NULL)
728 else if (lex_match ('/'))
729 new->f.type = FMT_NEWREC;
730 else if (!parse_format_specifier (&new->f, 1)
731 || !check_input_specifier (&new->f))
736 lex_force_match (')');
741 destroy_fmt_list (head, 0);
746 /* Parses a FORTRAN-like format specification into the linked
747 list with head FIRST and tail LAST. Returns nonzero if
750 fixed_parse_fortran (struct fixed_parsing_state *fx,
751 struct dls_var_spec **first, struct dls_var_spec **last)
753 struct fmt_list *list;
756 list = fixed_parse_fortran_internal (fx, first, last);
761 dump_fmt_list (fx, list, first, last, &name_idx);
762 destroy_fmt_list (list, 1);
763 if (name_idx < fx->name_cnt)
765 msg (SE, _("There aren't enough format specifications "
766 "to match the number of variable names given."));
773 /* Displays a table giving information on fixed-format variable
774 parsing on DATA LIST. */
775 /* FIXME: The `Columns' column should be divided into three columns,
776 one for the starting column, one for the dash, one for the ending
777 column; then right-justify the starting column and left-justify the
780 dump_fixed_table (const struct dls_var_spec *specs,
781 const struct file_handle *handle, int nrec)
783 const struct dls_var_spec *spec;
786 const char *filename;
789 for (i = 0, spec = specs; spec; spec = spec->next)
791 t = tab_create (4, i + 1, 0);
792 tab_columns (t, TAB_COL_DOWN, 1);
793 tab_headers (t, 0, 0, 1, 0);
794 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
795 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
796 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
797 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
798 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
799 tab_hline (t, TAL_2, 0, 3, 1);
800 tab_dim (t, tab_natural_dimensions);
802 for (i = 1, spec = specs; spec; spec = spec->next, i++)
804 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
805 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
806 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
808 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
809 fmt_to_string (&spec->input));
812 filename = handle_get_filename (handle);
813 if (filename == NULL)
815 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
816 sprintf (buf, (handle != inline_file
817 ? ngettext ("Reading %d record from file %s.",
818 "Reading %d records from file %s.", nrec)
819 : ngettext ("Reading %d record from the command file.",
820 "Reading %d records from the command file.",
824 tab_title (t, 0, buf);
829 /* Free-format parsing. */
831 /* Parses variable specifications for DATA LIST FREE and adds
832 them to the linked list with head FIRST and tail LAST.
833 Returns nonzero only if successful. */
835 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
840 struct fmt_spec input, output;
846 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
850 if (!parse_format_specifier (&input, 0)
851 || !check_input_specifier (&input)
852 || !lex_force_match (')'))
854 for (i = 0; i < name_cnt; i++)
859 convert_fmt_ItoO (&input, &output);
867 output = get_format();
870 if (input.type == FMT_A || input.type == FMT_AHEX)
874 for (i = 0; i < name_cnt; i++)
876 struct dls_var_spec *spec;
879 v = dict_create_var (default_dict, name[i], width);
882 msg (SE, _("%s is a duplicate variable name."), name[i]);
885 v->print = v->write = output;
887 if (!case_source_is_complex (vfm_source))
890 spec = xmalloc (sizeof *spec);
894 strcpy (spec->name, name[i]);
895 append_var_spec (first, last, spec);
897 for (i = 0; i < name_cnt; i++)
903 lex_error (_("expecting end of command"));
907 /* Displays a table giving information on free-format variable parsing
910 dump_free_table (const struct data_list_pgm *dls)
916 struct dls_var_spec *spec;
917 for (i = 0, spec = dls->first; spec; spec = spec->next)
921 t = tab_create (2, i + 1, 0);
922 tab_columns (t, TAB_COL_DOWN, 1);
923 tab_headers (t, 0, 0, 1, 0);
924 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
925 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
926 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
927 tab_hline (t, TAL_2, 0, 1, 1);
928 tab_dim (t, tab_natural_dimensions);
931 struct dls_var_spec *spec;
933 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
935 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
936 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
941 const char *filename;
943 filename = handle_get_filename (dls->handle);
944 if (filename == NULL)
947 (dls->handle != inline_file
948 ? _("Reading free-form data from file %s.")
949 : _("Reading free-form data from the command file.")),
956 /* Input procedure. */
958 /* Extracts a field from the current position in the current
959 record. Fields can be unquoted or quoted with single- or
960 double-quote characters. *FIELD is set to the field content.
961 After parsing the field, sets the current position in the
962 record to just past the field and any trailing delimiter.
963 END_BLANK is used internally; it should be initialized by the
964 caller to 0 and left alone afterward. Returns 0 on failure or
965 a 1-based column number indicating the beginning of the field
968 cut_field (const struct data_list_pgm *dls, struct len_string *field,
971 struct len_string line;
975 if (dfm_eof (dls->handle))
977 if (dls->delim_cnt == 0)
978 dfm_expand_tabs (dls->handle);
979 dfm_get_record (dls->handle, &line);
981 cp = ls_c_str (&line);
982 if (dls->delim_cnt == 0)
984 /* Skip leading whitespace. */
985 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
987 if (cp >= ls_end (&line))
990 /* Handle actual data, whether quoted or unquoted. */
991 if (*cp == '\'' || *cp == '"')
995 field->string = ++cp;
996 while (cp < ls_end (&line) && *cp != quote)
998 field->length = cp - field->string;
999 if (cp < ls_end (&line))
1002 msg (SW, _("Quoted string missing terminating `%c'."), quote);
1007 while (cp < ls_end (&line)
1008 && !isspace ((unsigned char) *cp) && *cp != ',')
1010 field->length = cp - field->string;
1013 /* Skip trailing whitespace and a single comma if present. */
1014 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
1016 if (cp < ls_end (&line) && *cp == ',')
1021 if (cp >= ls_end (&line))
1023 int column = dfm_column_start (dls->handle);
1024 /* A blank line or a line that ends in \t has a
1025 trailing blank field. */
1026 if (column == 1 || (column > 1 && cp[-1] == '\t'))
1028 if (*end_blank == 0)
1031 field->string = ls_end (&line);
1033 dfm_forward_record (dls->handle);
1048 while (cp < ls_end (&line)
1049 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1051 field->length = cp - field->string;
1052 if (cp < ls_end (&line))
1057 dfm_forward_columns (dls->handle, field->string - line.string);
1058 column_start = dfm_column_start (dls->handle);
1060 dfm_forward_columns (dls->handle, cp - field->string);
1062 return column_start;
1065 typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *);
1066 static data_list_read_func read_from_data_list_fixed;
1067 static data_list_read_func read_from_data_list_free;
1068 static data_list_read_func read_from_data_list_list;
1070 /* Returns the proper function to read the kind of DATA LIST
1071 data specified by DLS. */
1072 static data_list_read_func *
1073 get_data_list_read_func (const struct data_list_pgm *dls)
1078 return read_from_data_list_fixed;
1081 return read_from_data_list_free;
1084 return read_from_data_list_list;
1092 /* Reads a case from the data file into C, parsing it according
1093 to fixed-format syntax rules in DLS. Returns -1 on success,
1094 -2 at end of file. */
1096 read_from_data_list_fixed (const struct data_list_pgm *dls,
1099 struct dls_var_spec *var_spec = dls->first;
1102 if (dfm_eof (dls->handle))
1104 for (i = 1; i <= dls->nrec; i++)
1106 struct len_string line;
1108 if (dfm_eof (dls->handle))
1110 /* Note that this can't occur on the first record. */
1111 msg (SW, _("Partial case of %d of %d records discarded."),
1115 dfm_expand_tabs (dls->handle);
1116 dfm_get_record (dls->handle, &line);
1118 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1122 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1123 var_spec->fc, var_spec->lc);
1124 di.v = case_data_rw (c, var_spec->fv);
1126 di.f1 = var_spec->fc;
1127 di.format = var_spec->input;
1132 dfm_forward_record (dls->handle);
1138 /* Reads a case from the data file into C, parsing it according
1139 to free-format syntax rules in DLS. Returns -1 on success,
1140 -2 at end of file. */
1142 read_from_data_list_free (const struct data_list_pgm *dls,
1145 struct dls_var_spec *var_spec;
1148 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1150 struct len_string field;
1153 /* Cut out a field and read in a new record if necessary. */
1156 column = cut_field (dls, &field, &end_blank);
1160 if (!dfm_eof (dls->handle))
1161 dfm_forward_record (dls->handle);
1162 if (dfm_eof (dls->handle))
1164 if (var_spec != dls->first)
1165 msg (SW, _("Partial case discarded. The first variable "
1166 "missing was %s."), var_spec->name);
1174 di.s = ls_c_str (&field);
1175 di.e = ls_end (&field);
1176 di.v = case_data_rw (c, var_spec->fv);
1179 di.format = var_spec->input;
1186 /* Reads a case from the data file and parses it according to
1187 list-format syntax rules. Returns -1 on success, -2 at end of
1190 read_from_data_list_list (const struct data_list_pgm *dls,
1193 struct dls_var_spec *var_spec;
1196 if (dfm_eof (dls->handle))
1199 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1201 struct len_string field;
1204 /* Cut out a field and check for end-of-line. */
1205 column = cut_field (dls, &field, &end_blank);
1208 if (get_undefined ())
1209 msg (SW, _("Missing value(s) for all variables from %s onward. "
1210 "These will be filled with the system-missing value "
1211 "or blanks, as appropriate."),
1213 for (; var_spec; var_spec = var_spec->next)
1215 int width = get_format_var_width (&var_spec->input);
1217 case_data_rw (c, var_spec->fv)->f = SYSMIS;
1219 memset (case_data_rw (c, var_spec->fv)->s, ' ', width);
1227 di.s = ls_c_str (&field);
1228 di.e = ls_end (&field);
1229 di.v = case_data_rw (c, var_spec->fv);
1232 di.format = var_spec->input;
1237 dfm_forward_record (dls->handle);
1241 /* Destroys SPEC. */
1243 destroy_dls_var_spec (struct dls_var_spec *spec)
1245 struct dls_var_spec *next;
1247 while (spec != NULL)
1255 /* Destroys DATA LIST transformation PGM. */
1257 data_list_trns_free (struct trns_header *pgm)
1259 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1261 destroy_dls_var_spec (dls->first);
1262 fh_close_handle (dls->handle);
1266 /* Handle DATA LIST transformation T, parsing data into C. */
1268 data_list_trns_proc (struct trns_header *t, struct ccase *c,
1269 int case_num UNUSED)
1271 struct data_list_pgm *dls = (struct data_list_pgm *) t;
1272 data_list_read_func *read_func;
1275 dfm_push (dls->handle);
1277 read_func = get_data_list_read_func (dls);
1278 retval = read_func (dls, c);
1280 /* Handle end of file. */
1283 /* If we already encountered end of file then this is an
1287 msg (SE, _("Attempt to read past end of file."));
1289 dfm_pop (dls->handle);
1293 /* Otherwise simply note it. */
1299 /* If there was an END subcommand handle it. */
1300 if (dls->end != NULL)
1304 case_data_rw (c, dls->end->fv)->f = 1.0;
1308 case_data_rw (c, dls->end->fv)->f = 0.0;
1311 dfm_pop (dls->handle);
1316 /* Reads all the records from the data file and passes them to
1319 data_list_source_read (struct case_source *source,
1321 write_case_func *write_case, write_case_data wc_data)
1323 struct data_list_pgm *dls = source->aux;
1324 data_list_read_func *read_func = get_data_list_read_func (dls);
1326 dfm_push (dls->handle);
1327 while (read_func (dls, c) != -2)
1328 if (!write_case (wc_data))
1330 dfm_pop (dls->handle);
1332 fh_close_handle (dls->handle);
1335 /* Destroys the source's internal data. */
1337 data_list_source_destroy (struct case_source *source)
1339 data_list_trns_free (source->aux);
1342 const struct case_source_class data_list_source_class =
1346 data_list_source_read,
1347 data_list_source_destroy,
1350 /* REPEATING DATA. */
1352 /* Represents a number or a variable. */
1353 struct rpd_num_or_var
1355 int num; /* Value, or 0. */
1356 struct variable *var; /* Variable, if number==0. */
1359 /* REPEATING DATA private data structure. */
1360 struct repeating_data_trns
1362 struct trns_header h;
1363 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1364 struct file_handle *handle; /* Input file, never NULL. */
1366 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1367 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1368 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1369 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1370 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1371 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1373 /* ID subcommand. */
1374 int id_beg, id_end; /* Beginning & end columns. */
1375 struct variable *id_var; /* DATA LIST variable. */
1376 struct fmt_spec id_spec; /* Input format spec. */
1377 union value *id_value; /* ID value. */
1379 write_case_func *write_case;
1380 write_case_data wc_data;
1383 static trns_free_func repeating_data_trns_free;
1384 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1385 static int parse_repeating_data (struct dls_var_spec **,
1386 struct dls_var_spec **);
1387 static void find_variable_input_spec (struct variable *v,
1388 struct fmt_spec *spec);
1390 /* Parses the REPEATING DATA command. */
1392 cmd_repeating_data (void)
1394 struct repeating_data_trns *rpd;
1396 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1399 /* Bits are set when a particular subcommand has been seen. */
1402 assert (case_source_is_complex (vfm_source));
1404 rpd = xmalloc (sizeof *rpd);
1405 rpd->handle = default_handle;
1406 rpd->first = rpd->last = NULL;
1407 rpd->starts_beg.num = 0;
1408 rpd->starts_beg.var = NULL;
1409 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1410 = rpd->cont_end = rpd->starts_beg;
1411 rpd->id_beg = rpd->id_end = 0;
1413 rpd->id_value = NULL;
1419 if (lex_match_id ("FILE"))
1422 rpd->handle = fh_parse_file_handle ();
1425 if (rpd->handle != default_handle)
1427 msg (SE, _("REPEATING DATA must use the same file as its "
1428 "corresponding DATA LIST or FILE TYPE."));
1432 else if (lex_match_id ("STARTS"))
1437 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1442 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1445 lex_negative_to_dash ();
1446 if (lex_match ('-'))
1448 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1451 /* Otherwise, rpd->starts_end is left uninitialized.
1452 This is okay. We will initialize it later from the
1453 record length of the file. We can't do this now
1454 because we can't be sure that the user has specified
1455 the file handle yet. */
1458 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1459 && rpd->starts_beg.num > rpd->starts_end.num)
1461 msg (SE, _("STARTS beginning column (%d) exceeds "
1462 "STARTS ending column (%d)."),
1463 rpd->starts_beg.num, rpd->starts_end.num);
1467 else if (lex_match_id ("OCCURS"))
1472 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1477 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1480 else if (lex_match_id ("LENGTH"))
1485 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1490 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1493 else if (lex_match_id ("CONTINUED"))
1498 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1503 if (!lex_match ('/'))
1505 if (!parse_num_or_var (&rpd->cont_beg, "CONTINUED beginning column"))
1508 lex_negative_to_dash ();
1510 && !parse_num_or_var (&rpd->cont_end,
1511 "CONTINUED ending column"))
1514 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1515 && rpd->cont_beg.num > rpd->cont_end.num)
1517 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1518 "CONTINUED ending column (%d)."),
1519 rpd->cont_beg.num, rpd->cont_end.num);
1524 rpd->cont_beg.num = 1;
1526 else if (lex_match_id ("ID"))
1531 msg (SE, _("%s subcommand given multiple times."),"ID");
1536 if (!lex_force_int ())
1538 if (lex_integer () < 1)
1540 msg (SE, _("ID beginning column (%ld) must be positive."),
1544 rpd->id_beg = lex_integer ();
1547 lex_negative_to_dash ();
1549 if (lex_match ('-'))
1551 if (!lex_force_int ())
1553 if (lex_integer () < 1)
1555 msg (SE, _("ID ending column (%ld) must be positive."),
1559 if (lex_integer () < rpd->id_end)
1561 msg (SE, _("ID ending column (%ld) cannot be less than "
1562 "ID beginning column (%d)."),
1563 lex_integer (), rpd->id_beg);
1567 rpd->id_end = lex_integer ();
1570 else rpd->id_end = rpd->id_beg;
1572 if (!lex_force_match ('='))
1574 rpd->id_var = parse_variable ();
1575 if (rpd->id_var == NULL)
1578 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1579 rpd->id_value = xmalloc (sizeof *rpd->id_value * rpd->id_var->nv);
1581 else if (lex_match_id ("TABLE"))
1583 else if (lex_match_id ("NOTABLE"))
1585 else if (lex_match_id ("DATA"))
1593 if (!lex_force_match ('/'))
1597 /* Comes here when DATA specification encountered. */
1598 if ((seen & (1 | 2)) != (1 | 2))
1600 if ((seen & 1) == 0)
1601 msg (SE, _("Missing required specification STARTS."));
1602 if ((seen & 2) == 0)
1603 msg (SE, _("Missing required specification OCCURS."));
1607 /* Enforce ID restriction. */
1608 if ((seen & 16) && !(seen & 8))
1610 msg (SE, _("ID specified without CONTINUED."));
1614 /* Calculate starts_end, cont_end if necessary. */
1615 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1616 rpd->starts_end.num = handle_get_record_width (rpd->handle);
1617 if (rpd->cont_end.num == 0 && rpd->starts_end.var == NULL)
1618 rpd->cont_end.num = handle_get_record_width (rpd->handle);
1620 /* Calculate length if possible. */
1621 if ((seen & 4) == 0)
1623 struct dls_var_spec *iter;
1625 for (iter = rpd->first; iter; iter = iter->next)
1627 if (iter->lc > rpd->length.num)
1628 rpd->length.num = iter->lc;
1630 assert (rpd->length.num != 0);
1634 if (!parse_repeating_data (&rpd->first, &rpd->last))
1638 dump_fixed_table (rpd->first, rpd->handle, rpd->last->rec);
1641 struct repeating_data_trns *new_trns;
1643 rpd->h.proc = repeating_data_trns_proc;
1644 rpd->h.free = repeating_data_trns_free;
1646 new_trns = xmalloc (sizeof *new_trns);
1647 memcpy (new_trns, &rpd, sizeof *new_trns);
1648 add_transformation ((struct trns_header *) new_trns);
1651 return lex_end_of_command ();
1654 destroy_dls_var_spec (rpd->first);
1655 free (rpd->id_value);
1659 /* Finds the input format specification for variable V and puts
1660 it in SPEC. Because of the way that DATA LIST is structured,
1661 this is nontrivial. */
1663 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1667 for (i = 0; i < n_trns; i++)
1669 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1671 if (pgm->h.proc == data_list_trns_proc)
1673 struct dls_var_spec *iter;
1675 for (iter = pgm->first; iter; iter = iter->next)
1678 *spec = iter->input;
1687 /* Parses a number or a variable name from the syntax file and puts
1688 the results in VALUE. Ensures that the number is at least 1; else
1689 emits an error based on MESSAGE. Returns nonzero only if
1692 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1697 value->var = parse_variable ();
1698 if (value->var == NULL)
1700 if (value->var->type == ALPHA)
1702 msg (SE, _("String variable not allowed here."));
1706 else if (lex_integer_p ())
1708 value->num = lex_integer ();
1712 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1718 msg (SE, _("Variable or integer expected for %s."), message);
1724 /* Parses data specifications for repeating data groups, adding
1725 them to the linked list with head FIRST and tail LAST.
1726 Returns nonzero only if successful. */
1728 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1730 struct fixed_parsing_state fx;
1736 while (token != '.')
1738 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1743 if (!fixed_parse_compatible (&fx, first, last))
1746 else if (token == '(')
1748 if (!fixed_parse_fortran (&fx, first, last))
1753 msg (SE, _("SPSS-like or FORTRAN-like format "
1754 "specification expected after variable names."));
1758 for (i = 0; i < fx.name_cnt; i++)
1764 lex_error (_("expecting end of command"));
1771 for (i = 0; i < fx.name_cnt; i++)
1777 /* Obtains the real value for rpd_num_or_var N in case C and returns
1778 it. The valid range is nonnegative numbers, but numbers outside
1779 this range can be returned and should be handled by the caller as
1782 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1787 assert (n->num == 0);
1790 double v = case_num (c, n->var->fv);
1792 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1801 /* Parameter record passed to rpd_parse_record(). */
1802 struct rpd_parse_info
1804 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1805 const char *line; /* Line being parsed. */
1806 size_t len; /* Line length. */
1807 int beg, end; /* First and last column of first occurrence. */
1808 int ofs; /* Column offset between repeated occurrences. */
1809 struct ccase *c; /* Case to fill in. */
1810 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1811 int max_occurs; /* Max number of occurrences to parse. */
1814 /* Parses one record of repeated data and outputs corresponding
1815 cases. Returns number of occurrences parsed up to the
1816 maximum specified in INFO. */
1818 rpd_parse_record (const struct rpd_parse_info *info)
1820 struct repeating_data_trns *t = info->trns;
1821 int cur = info->beg;
1824 /* Handle record ID values. */
1827 union value id_temp[MAX_ELEMS_PER_VALUE];
1829 /* Parse record ID into V. */
1833 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1834 di.v = info->verify_id ? id_temp : t->id_value;
1837 di.format = t->id_spec;
1844 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1846 char expected_str [MAX_FORMATTED_LEN + 1];
1847 char actual_str [MAX_FORMATTED_LEN + 1];
1849 data_out (expected_str, &t->id_var->print, t->id_value);
1850 expected_str[t->id_var->print.w] = '\0';
1852 data_out (actual_str, &t->id_var->print, id_temp);
1853 actual_str[t->id_var->print.w] = '\0';
1856 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1857 actual_str, expected_str);
1863 /* Iterate over the set of expected occurrences and record each of
1864 them as a separate case. FIXME: We need to execute any
1865 transformations that follow the current one. */
1869 for (occurrences = 0; occurrences < info->max_occurs; )
1871 if (cur + info->ofs > info->end + 1)
1876 struct dls_var_spec *var_spec = t->first;
1878 for (; var_spec; var_spec = var_spec->next)
1880 int fc = var_spec->fc - 1 + cur;
1881 int lc = var_spec->lc - 1 + cur;
1883 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1888 _("Variable %s starting in column %d extends "
1889 "beyond physical record length of %d."),
1890 var_spec->v->name, fc, info->len);
1896 data_in_finite_line (&di, info->line, info->len, fc, lc);
1897 di.v = case_data_rw (info->c, var_spec->fv);
1900 di.format = var_spec->input;
1910 if (!t->write_case (t->wc_data))
1918 /* Reads one set of repetitions of the elements in the REPEATING
1919 DATA structure. Returns -1 on success, -2 on end of file or
1922 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c,
1923 int case_num UNUSED)
1925 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1927 struct len_string line; /* Current record. */
1929 int starts_beg; /* Starting column. */
1930 int starts_end; /* Ending column. */
1931 int occurs; /* Number of repetitions. */
1932 int length; /* Length of each occurrence. */
1933 int cont_beg; /* Starting column for continuation lines. */
1934 int cont_end; /* Ending column for continuation lines. */
1936 int occurs_left; /* Number of occurrences remaining. */
1938 int code; /* Return value from rpd_parse_record(). */
1940 int skip_first_record = 0;
1942 dfm_push (t->handle);
1944 /* Read the current record. */
1945 dfm_reread_record (t->handle, 1);
1946 dfm_expand_tabs (t->handle);
1947 if (dfm_eof (t->handle))
1949 dfm_get_record (t->handle, &line);
1950 dfm_forward_record (t->handle);
1952 /* Calculate occurs, length. */
1953 occurs_left = occurs = realize_value (&t->occurs, c);
1956 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1959 starts_beg = realize_value (&t->starts_beg, c);
1960 if (starts_beg <= 0)
1962 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1967 starts_end = realize_value (&t->starts_end, c);
1968 if (starts_end < starts_beg)
1970 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1971 "beginning column (%d)."),
1972 starts_end, starts_beg);
1973 skip_first_record = 1;
1975 length = realize_value (&t->length, c);
1978 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1980 occurs = occurs_left = 1;
1982 cont_beg = realize_value (&t->cont_beg, c);
1985 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1990 cont_end = realize_value (&t->cont_end, c);
1991 if (cont_end < cont_beg)
1993 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1994 "beginning column (%d)."),
1995 cont_end, cont_beg);
1999 /* Parse the first record. */
2000 if (!skip_first_record)
2002 struct rpd_parse_info info;
2004 info.line = ls_c_str (&line);
2005 info.len = ls_length (&line);
2006 info.beg = starts_beg;
2007 info.end = starts_end;
2011 info.max_occurs = occurs_left;
2012 code = rpd_parse_record (&info);
2015 occurs_left -= code;
2017 else if (cont_beg == 0)
2020 /* Make sure, if some occurrences are left, that we have
2021 continuation records. */
2022 if (occurs_left > 0 && cont_beg == 0)
2025 _("Number of repetitions specified on OCCURS (%d) "
2026 "exceed number of repetitions available in "
2027 "space on STARTS (%d), and CONTINUED not specified."),
2028 occurs, (starts_end - starts_beg + 1) / length);
2032 /* Go on to additional records. */
2033 while (occurs_left != 0)
2035 struct rpd_parse_info info;
2037 assert (occurs_left >= 0);
2039 /* Read in another record. */
2040 if (dfm_eof (t->handle))
2043 _("Unexpected end of file with %d repetitions "
2044 "remaining out of %d."),
2045 occurs_left, occurs);
2048 dfm_expand_tabs (t->handle);
2049 dfm_get_record (t->handle, &line);
2050 dfm_forward_record (t->handle);
2052 /* Parse this record. */
2054 info.line = ls_c_str (&line);
2055 info.len = ls_length (&line);
2056 info.beg = cont_beg;
2057 info.end = cont_end;
2061 info.max_occurs = occurs_left;
2062 code = rpd_parse_record (&info);;
2065 occurs_left -= code;
2068 dfm_pop (t->handle);
2070 /* FIXME: This is a kluge until we've implemented multiplexing of
2075 /* Frees a REPEATING DATA transformation. */
2077 repeating_data_trns_free (struct trns_header *rpd_)
2079 struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_;
2081 destroy_dls_var_spec (rpd->first);
2082 fh_close_handle (rpd->handle);
2083 free (rpd->id_value);
2086 /* Lets repeating_data_trns_proc() know how to write the cases
2087 that it composes. Not elegant. */
2089 repeating_data_set_write_case (struct trns_header *trns,
2090 write_case_func *write_case,
2091 write_case_data wc_data)
2093 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
2095 assert (trns->proc == repeating_data_trns_proc);
2096 t->write_case = write_case;
2097 t->wc_data = wc_data;