1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "data-list.h"
31 #include "debug-print.h"
33 #include "dictionary.h"
35 #include "file-handle.h"
45 /* Utility function. */
47 /* FIXME: Either REPEATING DATA must be the last transformation, or we
48 must multiplex the transformations that follow (i.e., perform them
49 for every case that we produce from a repetition instance).
50 Currently we do neither. We should do one or the other. */
52 /* Describes how to parse one variable. */
55 struct dls_var_spec *next; /* Next specification in list. */
57 /* Both free and fixed formats. */
58 struct fmt_spec input; /* Input format of this field. */
59 struct variable *v; /* Associated variable. Used only in
60 parsing. Not safe later. */
61 int fv; /* First value in case. */
63 /* Fixed format only. */
64 int rec; /* Record number (1-based). */
65 int fc, lc; /* Column numbers in record. */
67 /* Free format only. */
68 char name[SHORT_NAME_LEN + 1]; /* Name of variable. */
71 /* Constants for DATA LIST type. */
72 /* Must match table in cmd_data_list(). */
80 /* DATA LIST private data structure. */
85 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
86 struct dfm_reader *reader; /* Data file reader. */
88 int type; /* A DLS_* constant. */
89 struct variable *end; /* Variable specified on END subcommand. */
90 int eof; /* End of file encountered. */
91 int rec_cnt; /* Number of records. */
92 size_t case_size; /* Case size in bytes. */
93 char *delims; /* Delimiters if any; not null-terminated. */
94 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
97 static int parse_fixed (struct data_list_pgm *);
98 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
99 static void dump_fixed_table (const struct dls_var_spec *,
100 const struct file_handle *, int rec_cnt);
101 static void dump_free_table (const struct data_list_pgm *,
102 const struct file_handle *);
103 static void destroy_dls_var_spec (struct dls_var_spec *);
104 static trns_free_func data_list_trns_free;
105 static trns_proc_func data_list_trns_proc;
107 /* Message title for REPEATING DATA. */
108 #define RPD_ERR "REPEATING DATA: "
113 struct data_list_pgm *dls; /* DATA LIST program under construction. */
114 int table = -1; /* Print table if nonzero, -1=undecided. */
115 struct file_handle *fh = NULL; /* File handle of source, NULL=inline file. */
117 if (!case_source_is_complex (vfm_source))
118 discard_variables ();
120 dls = xmalloc (sizeof *dls);
128 dls->first = dls->last = NULL;
132 if (lex_match_id ("FILE"))
138 if (case_source_is_class (vfm_source, &file_type_source_class)
139 && fh != default_handle)
141 msg (SE, _("DATA LIST may not use a different file from "
142 "that specified on its surrounding FILE TYPE."));
146 else if (lex_match_id ("RECORDS"))
150 if (!lex_force_int ())
152 dls->rec_cnt = lex_integer ();
156 else if (lex_match_id ("END"))
160 msg (SE, _("The END subcommand may only be specified once."));
165 if (!lex_force_id ())
167 dls->end = dict_lookup_var (default_dict, tokid);
169 dls->end = dict_create_var_assert (default_dict, tokid, 0);
172 else if (token == T_ID)
174 if (lex_match_id ("NOTABLE"))
176 else if (lex_match_id ("TABLE"))
181 if (lex_match_id ("FIXED"))
183 else if (lex_match_id ("FREE"))
185 else if (lex_match_id ("LIST"))
195 msg (SE, _("Only one of FIXED, FREE, or LIST may "
201 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
204 while (!lex_match (')'))
208 if (lex_match_id ("TAB"))
210 else if (token == T_STRING && tokstr.length == 1)
212 delim = tokstr.string[0];
221 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
222 dls->delims[dls->delim_cnt++] = delim;
236 dls->case_size = dict_get_case_size (default_dict);
240 dls->type = DLS_FIXED;
244 if (dls->type == DLS_FREE)
250 if (dls->type == DLS_FIXED)
252 if (!parse_fixed (dls))
255 dump_fixed_table (dls->first, fh, dls->rec_cnt);
259 if (!parse_free (&dls->first, &dls->last))
262 dump_free_table (dls, fh);
265 dls->reader = dfm_open_reader (fh);
266 if (dls->reader == NULL)
269 if (vfm_source != NULL)
271 struct data_list_pgm *new_pgm;
273 dls->h.proc = data_list_trns_proc;
274 dls->h.free = data_list_trns_free;
276 new_pgm = xmalloc (sizeof *new_pgm);
277 memcpy (new_pgm, &dls, sizeof *new_pgm);
278 add_transformation (&new_pgm->h);
281 vfm_source = create_case_source (&data_list_source_class, dls);
286 destroy_dls_var_spec (dls->first);
292 /* Adds SPEC to the linked list with head at FIRST and tail at
295 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
296 struct dls_var_spec *spec)
303 (*last)->next = spec;
307 /* Fixed-format parsing. */
309 /* Used for chaining together fortran-like format specifiers. */
312 struct fmt_list *next;
315 struct fmt_list *down;
318 /* State of parsing DATA LIST. */
319 struct fixed_parsing_state
321 char **name; /* Variable names. */
322 int name_cnt; /* Number of names. */
324 int recno; /* Index of current record. */
325 int sc; /* 1-based column number of starting column for
326 next field to output. */
329 static int fixed_parse_compatible (struct fixed_parsing_state *,
330 struct dls_var_spec **,
331 struct dls_var_spec **);
332 static int fixed_parse_fortran (struct fixed_parsing_state *,
333 struct dls_var_spec **,
334 struct dls_var_spec **);
336 /* Parses all the variable specifications for DATA LIST FIXED,
337 storing them into DLS. Returns nonzero if successful. */
339 parse_fixed (struct data_list_pgm *dls)
341 struct fixed_parsing_state fx;
349 while (lex_match ('/'))
352 if (lex_is_integer ())
354 if (lex_integer () < fx.recno)
356 msg (SE, _("The record number specified, %ld, is "
357 "before the previous record, %d. Data "
358 "fields must be listed in order of "
359 "increasing record number."),
360 lex_integer (), fx.recno - 1);
364 fx.recno = lex_integer ();
370 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
373 if (lex_is_number ())
375 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
378 else if (token == '(')
380 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
385 msg (SE, _("SPSS-like or FORTRAN-like format "
386 "specification expected after variable names."));
390 for (i = 0; i < fx.name_cnt; i++)
394 if (dls->first == NULL)
396 msg (SE, _("At least one variable must be specified."));
399 if (dls->rec_cnt && dls->last->rec > dls->rec_cnt)
401 msg (SE, _("Variables are specified on records that "
402 "should not exist according to RECORDS subcommand."));
405 else if (!dls->rec_cnt)
406 dls->rec_cnt = dls->last->rec;
409 lex_error (_("expecting end of command"));
415 for (i = 0; i < fx.name_cnt; i++)
421 /* Parses a variable specification in the form 1-10 (A) based on
422 FX and adds specifications to the linked list with head at
423 FIRST and tail at LAST. */
425 fixed_parse_compatible (struct fixed_parsing_state *fx,
426 struct dls_var_spec **first, struct dls_var_spec **last)
428 struct fmt_spec input;
434 if (!lex_force_int ())
439 msg (SE, _("Column positions for fields must be positive."));
445 lex_negative_to_dash ();
448 if (!lex_force_int ())
453 msg (SE, _("Column positions for fields must be positive."));
458 msg (SE, _("The ending column for a field must be "
459 "greater than the starting column."));
468 /* Divide columns evenly. */
469 input.w = (lc - fc + 1) / fx->name_cnt;
470 if ((lc - fc + 1) % fx->name_cnt)
472 msg (SE, _("The %d columns %d-%d "
473 "can't be evenly divided into %d fields."),
474 lc - fc + 1, fc, lc, fx->name_cnt);
478 /* Format specifier. */
481 struct fmt_desc *fdp;
487 input.type = parse_format_specifier_name (&cp, 0);
488 if (input.type == -1)
492 msg (SE, _("A format specifier on this line "
493 "has extra characters on the end."));
503 if (lex_is_integer ())
505 if (lex_integer () < 1)
507 msg (SE, _("The value for number of decimal places "
508 "must be at least 1."));
512 input.d = lex_integer ();
518 fdp = &formats[input.type];
519 if (fdp->n_args < 2 && input.d)
521 msg (SE, _("Input format %s doesn't accept decimal places."),
529 if (!lex_force_match (')'))
537 if (!check_input_specifier (&input, 1))
540 /* Start column for next specification. */
543 /* Width of variables to create. */
544 if (input.type == FMT_A || input.type == FMT_AHEX)
549 /* Create variables and var specs. */
550 for (i = 0; i < fx->name_cnt; i++)
552 struct dls_var_spec *spec;
555 v = dict_create_var (default_dict, fx->name[i], width);
558 convert_fmt_ItoO (&input, &v->print);
560 if (!case_source_is_complex (vfm_source))
565 v = dict_lookup_var_assert (default_dict, fx->name[i]);
566 if (vfm_source == NULL)
568 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
571 if ((width != 0) != (v->width != 0))
573 msg (SE, _("There is already a variable %s of a "
578 if (width != 0 && width != v->width)
580 msg (SE, _("There is already a string variable %s of a "
581 "different width."), fx->name[i]);
586 spec = xmalloc (sizeof *spec);
590 spec->rec = fx->recno;
591 spec->fc = fc + input.w * i;
592 spec->lc = spec->fc + input.w - 1;
593 append_var_spec (first, last, spec);
598 /* Destroy format list F and, if RECURSE is nonzero, all its
601 destroy_fmt_list (struct fmt_list *f, int recurse)
603 struct fmt_list *next;
608 if (recurse && f->f.type == FMT_DESCEND)
609 destroy_fmt_list (f->down, 1);
614 /* Takes a hierarchically structured fmt_list F as constructed by
615 fixed_parse_fortran(), and flattens it, adding the variable
616 specifications to the linked list with head FIRST and tail
617 LAST. NAME_IDX is used to take values from the list of names
618 in FX; it should initially point to a value of 0. */
620 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
621 struct dls_var_spec **first, struct dls_var_spec **last,
626 for (; f; f = f->next)
627 if (f->f.type == FMT_X)
629 else if (f->f.type == FMT_T)
631 else if (f->f.type == FMT_NEWREC)
633 fx->recno += f->count;
637 for (i = 0; i < f->count; i++)
638 if (f->f.type == FMT_DESCEND)
640 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
645 struct dls_var_spec *spec;
649 if (formats[f->f.type].cat & FCAT_STRING)
653 if (*name_idx >= fx->name_cnt)
655 msg (SE, _("The number of format "
656 "specifications exceeds the given number of "
661 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
664 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
668 if (!case_source_is_complex (vfm_source))
671 spec = xmalloc (sizeof *spec);
675 spec->rec = fx->recno;
677 spec->lc = fx->sc + f->f.w - 1;
678 append_var_spec (first, last, spec);
680 convert_fmt_ItoO (&spec->input, &v->print);
688 /* Recursively parses a FORTRAN-like format specification into
689 the linked list with head FIRST and tail TAIL. LEVEL is the
690 level of recursion, starting from 0. Returns the parsed
691 specification if successful, or a null pointer on failure. */
692 static struct fmt_list *
693 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
694 struct dls_var_spec **first,
695 struct dls_var_spec **last)
697 struct fmt_list *head = NULL;
698 struct fmt_list *tail = NULL;
700 lex_force_match ('(');
704 struct fmt_list *new = xmalloc (sizeof *new);
707 /* Append new to list. */
715 if (lex_is_integer ())
717 new->count = lex_integer ();
723 /* Parse format specifier. */
726 new->f.type = FMT_DESCEND;
727 new->down = fixed_parse_fortran_internal (fx, first, last);
728 if (new->down == NULL)
731 else if (lex_match ('/'))
732 new->f.type = FMT_NEWREC;
733 else if (!parse_format_specifier (&new->f, FMTP_ALLOW_XT)
734 || !check_input_specifier (&new->f, 1))
739 lex_force_match (')');
744 destroy_fmt_list (head, 0);
749 /* Parses a FORTRAN-like format specification into the linked
750 list with head FIRST and tail LAST. Returns nonzero if
753 fixed_parse_fortran (struct fixed_parsing_state *fx,
754 struct dls_var_spec **first, struct dls_var_spec **last)
756 struct fmt_list *list;
759 list = fixed_parse_fortran_internal (fx, first, last);
764 dump_fmt_list (fx, list, first, last, &name_idx);
765 destroy_fmt_list (list, 1);
766 if (name_idx < fx->name_cnt)
768 msg (SE, _("There aren't enough format specifications "
769 "to match the number of variable names given."));
776 /* Displays a table giving information on fixed-format variable
777 parsing on DATA LIST. */
778 /* FIXME: The `Columns' column should be divided into three columns,
779 one for the starting column, one for the dash, one for the ending
780 column; then right-justify the starting column and left-justify the
783 dump_fixed_table (const struct dls_var_spec *specs,
784 const struct file_handle *fh, int rec_cnt)
786 const struct dls_var_spec *spec;
790 for (i = 0, spec = specs; spec; spec = spec->next)
792 t = tab_create (4, i + 1, 0);
793 tab_columns (t, TAB_COL_DOWN, 1);
794 tab_headers (t, 0, 0, 1, 0);
795 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
796 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
797 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
798 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
799 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
800 tab_hline (t, TAL_2, 0, 3, 1);
801 tab_dim (t, tab_natural_dimensions);
803 for (i = 1, spec = specs; spec; spec = spec->next, i++)
805 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
806 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
807 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
809 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
810 fmt_to_string (&spec->input));
814 tab_title (t, 1, ngettext ("Reading %d record from file %s.",
815 "Reading %d records from file %s.", rec_cnt),
816 rec_cnt, handle_get_filename (fh));
818 tab_title (t, 1, ngettext ("Reading %d record from the command file.",
819 "Reading %d records from the command file.",
825 /* Free-format parsing. */
827 /* Parses variable specifications for DATA LIST FREE and adds
828 them to the linked list with head FIRST and tail LAST.
829 Returns nonzero only if successful. */
831 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
836 struct fmt_spec input, output;
842 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
847 if (!parse_format_specifier (&input, 0)
848 || !check_input_specifier (&input, 1)
849 || !lex_force_match (')'))
851 for (i = 0; i < name_cnt; i++)
856 convert_fmt_ItoO (&input, &output);
864 output = get_format();
867 if (input.type == FMT_A || input.type == FMT_AHEX)
871 for (i = 0; i < name_cnt; i++)
873 struct dls_var_spec *spec;
876 v = dict_create_var (default_dict, name[i], width);
880 msg (SE, _("%s is a duplicate variable name."), name[i]);
883 v->print = v->write = output;
885 if (!case_source_is_complex (vfm_source))
888 spec = xmalloc (sizeof *spec);
892 strcpy (spec->name, v->name);
893 append_var_spec (first, last, spec);
895 for (i = 0; i < name_cnt; i++)
901 lex_error (_("expecting end of command"));
905 /* Displays a table giving information on free-format variable parsing
908 dump_free_table (const struct data_list_pgm *dls,
909 const struct file_handle *fh)
915 struct dls_var_spec *spec;
916 for (i = 0, spec = dls->first; spec; spec = spec->next)
920 t = tab_create (2, i + 1, 0);
921 tab_columns (t, TAB_COL_DOWN, 1);
922 tab_headers (t, 0, 0, 1, 0);
923 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
924 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
925 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
926 tab_hline (t, TAL_2, 0, 1, 1);
927 tab_dim (t, tab_natural_dimensions);
930 struct dls_var_spec *spec;
932 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
934 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
935 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
940 tab_title (t, 1, _("Reading free-form data from file %s."),
941 handle_get_filename (fh));
943 tab_title (t, 1, _("Reading free-form data from the command file."));
948 /* Input procedure. */
950 /* Extracts a field from the current position in the current
951 record. Fields can be unquoted or quoted with single- or
952 double-quote characters. *FIELD is set to the field content.
953 After parsing the field, sets the current position in the
954 record to just past the field and any trailing delimiter.
955 END_BLANK is used internally; it should be initialized by the
956 caller to 0 and left alone afterward. Returns 0 on failure or
957 a 1-based column number indicating the beginning of the field
960 cut_field (const struct data_list_pgm *dls, struct fixed_string *field,
963 struct fixed_string line;
967 if (dfm_eof (dls->reader))
969 if (dls->delim_cnt == 0)
970 dfm_expand_tabs (dls->reader);
971 dfm_get_record (dls->reader, &line);
973 cp = ls_c_str (&line);
974 if (dls->delim_cnt == 0)
976 /* Skip leading whitespace. */
977 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
979 if (cp >= ls_end (&line))
982 /* Handle actual data, whether quoted or unquoted. */
983 if (*cp == '\'' || *cp == '"')
987 field->string = ++cp;
988 while (cp < ls_end (&line) && *cp != quote)
990 field->length = cp - field->string;
991 if (cp < ls_end (&line))
994 msg (SW, _("Quoted string missing terminating `%c'."), quote);
999 while (cp < ls_end (&line)
1000 && !isspace ((unsigned char) *cp) && *cp != ',')
1002 field->length = cp - field->string;
1005 /* Skip trailing whitespace and a single comma if present. */
1006 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
1008 if (cp < ls_end (&line) && *cp == ',')
1013 if (cp >= ls_end (&line))
1015 int column = dfm_column_start (dls->reader);
1016 /* A blank line or a line that ends in \t has a
1017 trailing blank field. */
1018 if (column == 1 || (column > 1 && cp[-1] == '\t'))
1020 if (*end_blank == 0)
1023 field->string = ls_end (&line);
1025 dfm_forward_record (dls->reader);
1040 while (cp < ls_end (&line)
1041 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1043 field->length = cp - field->string;
1044 if (cp < ls_end (&line))
1049 dfm_forward_columns (dls->reader, field->string - line.string);
1050 column_start = dfm_column_start (dls->reader);
1052 dfm_forward_columns (dls->reader, cp - field->string);
1054 return column_start;
1057 typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *);
1058 static data_list_read_func read_from_data_list_fixed;
1059 static data_list_read_func read_from_data_list_free;
1060 static data_list_read_func read_from_data_list_list;
1062 /* Returns the proper function to read the kind of DATA LIST
1063 data specified by DLS. */
1064 static data_list_read_func *
1065 get_data_list_read_func (const struct data_list_pgm *dls)
1070 return read_from_data_list_fixed;
1073 return read_from_data_list_free;
1076 return read_from_data_list_list;
1084 /* Reads a case from the data file into C, parsing it according
1085 to fixed-format syntax rules in DLS. Returns -1 on success,
1086 -2 at end of file. */
1088 read_from_data_list_fixed (const struct data_list_pgm *dls,
1091 struct dls_var_spec *var_spec = dls->first;
1094 if (dfm_eof (dls->reader))
1096 for (i = 1; i <= dls->rec_cnt; i++)
1098 struct fixed_string line;
1100 if (dfm_eof (dls->reader))
1102 /* Note that this can't occur on the first record. */
1103 msg (SW, _("Partial case of %d of %d records discarded."),
1104 i - 1, dls->rec_cnt);
1107 dfm_expand_tabs (dls->reader);
1108 dfm_get_record (dls->reader, &line);
1110 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1114 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1115 var_spec->fc, var_spec->lc);
1116 di.v = case_data_rw (c, var_spec->fv);
1117 di.flags = DI_IMPLIED_DECIMALS;
1118 di.f1 = var_spec->fc;
1119 di.format = var_spec->input;
1124 dfm_forward_record (dls->reader);
1130 /* Reads a case from the data file into C, parsing it according
1131 to free-format syntax rules in DLS. Returns -1 on success,
1132 -2 at end of file. */
1134 read_from_data_list_free (const struct data_list_pgm *dls,
1137 struct dls_var_spec *var_spec;
1140 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1142 struct fixed_string field;
1145 /* Cut out a field and read in a new record if necessary. */
1148 column = cut_field (dls, &field, &end_blank);
1152 if (!dfm_eof (dls->reader))
1153 dfm_forward_record (dls->reader);
1154 if (dfm_eof (dls->reader))
1156 if (var_spec != dls->first)
1157 msg (SW, _("Partial case discarded. The first variable "
1158 "missing was %s."), var_spec->name);
1166 di.s = ls_c_str (&field);
1167 di.e = ls_end (&field);
1168 di.v = case_data_rw (c, var_spec->fv);
1171 di.format = var_spec->input;
1178 /* Reads a case from the data file and parses it according to
1179 list-format syntax rules. Returns -1 on success, -2 at end of
1182 read_from_data_list_list (const struct data_list_pgm *dls,
1185 struct dls_var_spec *var_spec;
1188 if (dfm_eof (dls->reader))
1191 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1193 struct fixed_string field;
1196 /* Cut out a field and check for end-of-line. */
1197 column = cut_field (dls, &field, &end_blank);
1200 if (get_undefined ())
1201 msg (SW, _("Missing value(s) for all variables from %s onward. "
1202 "These will be filled with the system-missing value "
1203 "or blanks, as appropriate."),
1205 for (; var_spec; var_spec = var_spec->next)
1207 int width = get_format_var_width (&var_spec->input);
1209 case_data_rw (c, var_spec->fv)->f = SYSMIS;
1211 memset (case_data_rw (c, var_spec->fv)->s, ' ', width);
1219 di.s = ls_c_str (&field);
1220 di.e = ls_end (&field);
1221 di.v = case_data_rw (c, var_spec->fv);
1224 di.format = var_spec->input;
1229 dfm_forward_record (dls->reader);
1233 /* Destroys SPEC. */
1235 destroy_dls_var_spec (struct dls_var_spec *spec)
1237 struct dls_var_spec *next;
1239 while (spec != NULL)
1247 /* Destroys DATA LIST transformation PGM. */
1249 data_list_trns_free (struct trns_header *pgm)
1251 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1253 destroy_dls_var_spec (dls->first);
1254 dfm_close_reader (dls->reader);
1258 /* Handle DATA LIST transformation T, parsing data into C. */
1260 data_list_trns_proc (struct trns_header *t, struct ccase *c,
1261 int case_num UNUSED)
1263 struct data_list_pgm *dls = (struct data_list_pgm *) t;
1264 data_list_read_func *read_func;
1267 dfm_push (dls->reader);
1269 read_func = get_data_list_read_func (dls);
1270 retval = read_func (dls, c);
1272 /* Handle end of file. */
1275 /* If we already encountered end of file then this is an
1279 msg (SE, _("Attempt to read past end of file."));
1281 dfm_pop (dls->reader);
1285 /* Otherwise simply note it. */
1291 /* If there was an END subcommand handle it. */
1292 if (dls->end != NULL)
1296 case_data_rw (c, dls->end->fv)->f = 1.0;
1300 case_data_rw (c, dls->end->fv)->f = 0.0;
1303 dfm_pop (dls->reader);
1308 /* Reads all the records from the data file and passes them to
1311 data_list_source_read (struct case_source *source,
1313 write_case_func *write_case, write_case_data wc_data)
1315 struct data_list_pgm *dls = source->aux;
1316 data_list_read_func *read_func = get_data_list_read_func (dls);
1318 dfm_push (dls->reader);
1319 while (read_func (dls, c) != -2)
1320 if (!write_case (wc_data))
1322 dfm_pop (dls->reader);
1325 /* Destroys the source's internal data. */
1327 data_list_source_destroy (struct case_source *source)
1329 data_list_trns_free (source->aux);
1332 const struct case_source_class data_list_source_class =
1336 data_list_source_read,
1337 data_list_source_destroy,
1340 /* REPEATING DATA. */
1342 /* Represents a number or a variable. */
1343 struct rpd_num_or_var
1345 int num; /* Value, or 0. */
1346 struct variable *var; /* Variable, if number==0. */
1349 /* REPEATING DATA private data structure. */
1350 struct repeating_data_trns
1352 struct trns_header h;
1353 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1354 struct dfm_reader *reader; /* Input file, never NULL. */
1356 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1357 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1358 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1359 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1360 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1361 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1363 /* ID subcommand. */
1364 int id_beg, id_end; /* Beginning & end columns. */
1365 struct variable *id_var; /* DATA LIST variable. */
1366 struct fmt_spec id_spec; /* Input format spec. */
1367 union value *id_value; /* ID value. */
1369 write_case_func *write_case;
1370 write_case_data wc_data;
1373 static trns_free_func repeating_data_trns_free;
1374 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1375 static int parse_repeating_data (struct dls_var_spec **,
1376 struct dls_var_spec **);
1377 static void find_variable_input_spec (struct variable *v,
1378 struct fmt_spec *spec);
1380 /* Parses the REPEATING DATA command. */
1382 cmd_repeating_data (void)
1384 struct repeating_data_trns *rpd;
1385 int table = 1; /* Print table? */
1386 unsigned seen = 0; /* Mark subcommands as already seen. */
1387 struct file_handle *const fh = default_handle;
1389 assert (case_source_is_complex (vfm_source));
1391 rpd = xmalloc (sizeof *rpd);
1392 rpd->reader = dfm_open_reader (default_handle);
1393 rpd->first = rpd->last = NULL;
1394 rpd->starts_beg.num = 0;
1395 rpd->starts_beg.var = NULL;
1396 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1397 = rpd->cont_end = rpd->starts_beg;
1398 rpd->id_beg = rpd->id_end = 0;
1400 rpd->id_value = NULL;
1406 if (lex_match_id ("FILE"))
1408 struct file_handle *file;
1415 msg (SE, _("REPEATING DATA must use the same file as its "
1416 "corresponding DATA LIST or FILE TYPE."));
1420 else if (lex_match_id ("STARTS"))
1425 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1430 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1433 lex_negative_to_dash ();
1434 if (lex_match ('-'))
1436 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1439 /* Otherwise, rpd->starts_end is left uninitialized.
1440 This is okay. We will initialize it later from the
1441 record length of the file. We can't do this now
1442 because we can't be sure that the user has specified
1443 the file handle yet. */
1446 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1447 && rpd->starts_beg.num > rpd->starts_end.num)
1449 msg (SE, _("STARTS beginning column (%d) exceeds "
1450 "STARTS ending column (%d)."),
1451 rpd->starts_beg.num, rpd->starts_end.num);
1455 else if (lex_match_id ("OCCURS"))
1460 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1465 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1468 else if (lex_match_id ("LENGTH"))
1473 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1478 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1481 else if (lex_match_id ("CONTINUED"))
1486 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1491 if (!lex_match ('/'))
1493 if (!parse_num_or_var (&rpd->cont_beg, "CONTINUED beginning column"))
1496 lex_negative_to_dash ();
1498 && !parse_num_or_var (&rpd->cont_end,
1499 "CONTINUED ending column"))
1502 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1503 && rpd->cont_beg.num > rpd->cont_end.num)
1505 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1506 "CONTINUED ending column (%d)."),
1507 rpd->cont_beg.num, rpd->cont_end.num);
1512 rpd->cont_beg.num = 1;
1514 else if (lex_match_id ("ID"))
1519 msg (SE, _("%s subcommand given multiple times."),"ID");
1524 if (!lex_force_int ())
1526 if (lex_integer () < 1)
1528 msg (SE, _("ID beginning column (%ld) must be positive."),
1532 rpd->id_beg = lex_integer ();
1535 lex_negative_to_dash ();
1537 if (lex_match ('-'))
1539 if (!lex_force_int ())
1541 if (lex_integer () < 1)
1543 msg (SE, _("ID ending column (%ld) must be positive."),
1547 if (lex_integer () < rpd->id_end)
1549 msg (SE, _("ID ending column (%ld) cannot be less than "
1550 "ID beginning column (%d)."),
1551 lex_integer (), rpd->id_beg);
1555 rpd->id_end = lex_integer ();
1558 else rpd->id_end = rpd->id_beg;
1560 if (!lex_force_match ('='))
1562 rpd->id_var = parse_variable ();
1563 if (rpd->id_var == NULL)
1566 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1567 rpd->id_value = xmalloc (sizeof *rpd->id_value * rpd->id_var->nv);
1569 else if (lex_match_id ("TABLE"))
1571 else if (lex_match_id ("NOTABLE"))
1573 else if (lex_match_id ("DATA"))
1581 if (!lex_force_match ('/'))
1585 /* Comes here when DATA specification encountered. */
1586 if ((seen & (1 | 2)) != (1 | 2))
1588 if ((seen & 1) == 0)
1589 msg (SE, _("Missing required specification STARTS."));
1590 if ((seen & 2) == 0)
1591 msg (SE, _("Missing required specification OCCURS."));
1595 /* Enforce ID restriction. */
1596 if ((seen & 16) && !(seen & 8))
1598 msg (SE, _("ID specified without CONTINUED."));
1602 /* Calculate starts_end, cont_end if necessary. */
1603 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1604 rpd->starts_end.num = handle_get_record_width (fh);
1605 if (rpd->cont_end.num == 0 && rpd->starts_end.var == NULL)
1606 rpd->cont_end.num = handle_get_record_width (fh);
1608 /* Calculate length if possible. */
1609 if ((seen & 4) == 0)
1611 struct dls_var_spec *iter;
1613 for (iter = rpd->first; iter; iter = iter->next)
1615 if (iter->lc > rpd->length.num)
1616 rpd->length.num = iter->lc;
1618 assert (rpd->length.num != 0);
1622 if (!parse_repeating_data (&rpd->first, &rpd->last))
1626 dump_fixed_table (rpd->first, fh, rpd->last->rec);
1629 struct repeating_data_trns *new_trns;
1631 rpd->h.proc = repeating_data_trns_proc;
1632 rpd->h.free = repeating_data_trns_free;
1634 new_trns = xmalloc (sizeof *new_trns);
1635 memcpy (new_trns, &rpd, sizeof *new_trns);
1636 add_transformation ((struct trns_header *) new_trns);
1639 return lex_end_of_command ();
1642 destroy_dls_var_spec (rpd->first);
1643 free (rpd->id_value);
1647 /* Finds the input format specification for variable V and puts
1648 it in SPEC. Because of the way that DATA LIST is structured,
1649 this is nontrivial. */
1651 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1655 for (i = 0; i < n_trns; i++)
1657 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1659 if (pgm->h.proc == data_list_trns_proc)
1661 struct dls_var_spec *iter;
1663 for (iter = pgm->first; iter; iter = iter->next)
1666 *spec = iter->input;
1675 /* Parses a number or a variable name from the syntax file and puts
1676 the results in VALUE. Ensures that the number is at least 1; else
1677 emits an error based on MESSAGE. Returns nonzero only if
1680 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1685 value->var = parse_variable ();
1686 if (value->var == NULL)
1688 if (value->var->type == ALPHA)
1690 msg (SE, _("String variable not allowed here."));
1694 else if (lex_is_integer ())
1696 value->num = lex_integer ();
1700 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1706 msg (SE, _("Variable or integer expected for %s."), message);
1712 /* Parses data specifications for repeating data groups, adding
1713 them to the linked list with head FIRST and tail LAST.
1714 Returns nonzero only if successful. */
1716 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1718 struct fixed_parsing_state fx;
1724 while (token != '.')
1726 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1729 if (lex_is_number ())
1731 if (!fixed_parse_compatible (&fx, first, last))
1734 else if (token == '(')
1736 if (!fixed_parse_fortran (&fx, first, last))
1741 msg (SE, _("SPSS-like or FORTRAN-like format "
1742 "specification expected after variable names."));
1746 for (i = 0; i < fx.name_cnt; i++)
1752 lex_error (_("expecting end of command"));
1759 for (i = 0; i < fx.name_cnt; i++)
1765 /* Obtains the real value for rpd_num_or_var N in case C and returns
1766 it. The valid range is nonnegative numbers, but numbers outside
1767 this range can be returned and should be handled by the caller as
1770 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1775 assert (n->num == 0);
1778 double v = case_num (c, n->var->fv);
1780 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1789 /* Parameter record passed to rpd_parse_record(). */
1790 struct rpd_parse_info
1792 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1793 const char *line; /* Line being parsed. */
1794 size_t len; /* Line length. */
1795 int beg, end; /* First and last column of first occurrence. */
1796 int ofs; /* Column offset between repeated occurrences. */
1797 struct ccase *c; /* Case to fill in. */
1798 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1799 int max_occurs; /* Max number of occurrences to parse. */
1802 /* Parses one record of repeated data and outputs corresponding
1803 cases. Returns number of occurrences parsed up to the
1804 maximum specified in INFO. */
1806 rpd_parse_record (const struct rpd_parse_info *info)
1808 struct repeating_data_trns *t = info->trns;
1809 int cur = info->beg;
1812 /* Handle record ID values. */
1815 union value id_temp[MAX_ELEMS_PER_VALUE];
1817 /* Parse record ID into V. */
1821 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1822 di.v = info->verify_id ? id_temp : t->id_value;
1825 di.format = t->id_spec;
1832 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1834 char expected_str [MAX_FORMATTED_LEN + 1];
1835 char actual_str [MAX_FORMATTED_LEN + 1];
1837 data_out (expected_str, &t->id_var->print, t->id_value);
1838 expected_str[t->id_var->print.w] = '\0';
1840 data_out (actual_str, &t->id_var->print, id_temp);
1841 actual_str[t->id_var->print.w] = '\0';
1844 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1845 actual_str, expected_str);
1851 /* Iterate over the set of expected occurrences and record each of
1852 them as a separate case. FIXME: We need to execute any
1853 transformations that follow the current one. */
1857 for (occurrences = 0; occurrences < info->max_occurs; )
1859 if (cur + info->ofs > info->end + 1)
1864 struct dls_var_spec *var_spec = t->first;
1866 for (; var_spec; var_spec = var_spec->next)
1868 int fc = var_spec->fc - 1 + cur;
1869 int lc = var_spec->lc - 1 + cur;
1871 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1876 _("Variable %s starting in column %d extends "
1877 "beyond physical record length of %d."),
1878 var_spec->v->name, fc, info->len);
1884 data_in_finite_line (&di, info->line, info->len, fc, lc);
1885 di.v = case_data_rw (info->c, var_spec->fv);
1888 di.format = var_spec->input;
1898 if (!t->write_case (t->wc_data))
1906 /* Reads one set of repetitions of the elements in the REPEATING
1907 DATA structure. Returns -1 on success, -2 on end of file or
1910 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c,
1911 int case_num UNUSED)
1913 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1915 struct fixed_string line; /* Current record. */
1917 int starts_beg; /* Starting column. */
1918 int starts_end; /* Ending column. */
1919 int occurs; /* Number of repetitions. */
1920 int length; /* Length of each occurrence. */
1921 int cont_beg; /* Starting column for continuation lines. */
1922 int cont_end; /* Ending column for continuation lines. */
1924 int occurs_left; /* Number of occurrences remaining. */
1926 int code; /* Return value from rpd_parse_record(). */
1928 int skip_first_record = 0;
1930 dfm_push (t->reader);
1932 /* Read the current record. */
1933 dfm_reread_record (t->reader, 1);
1934 dfm_expand_tabs (t->reader);
1935 if (dfm_eof (t->reader))
1937 dfm_get_record (t->reader, &line);
1938 dfm_forward_record (t->reader);
1940 /* Calculate occurs, length. */
1941 occurs_left = occurs = realize_value (&t->occurs, c);
1944 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1947 starts_beg = realize_value (&t->starts_beg, c);
1948 if (starts_beg <= 0)
1950 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1955 starts_end = realize_value (&t->starts_end, c);
1956 if (starts_end < starts_beg)
1958 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1959 "beginning column (%d)."),
1960 starts_end, starts_beg);
1961 skip_first_record = 1;
1963 length = realize_value (&t->length, c);
1966 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1968 occurs = occurs_left = 1;
1970 cont_beg = realize_value (&t->cont_beg, c);
1973 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1978 cont_end = realize_value (&t->cont_end, c);
1979 if (cont_end < cont_beg)
1981 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1982 "beginning column (%d)."),
1983 cont_end, cont_beg);
1987 /* Parse the first record. */
1988 if (!skip_first_record)
1990 struct rpd_parse_info info;
1992 info.line = ls_c_str (&line);
1993 info.len = ls_length (&line);
1994 info.beg = starts_beg;
1995 info.end = starts_end;
1999 info.max_occurs = occurs_left;
2000 code = rpd_parse_record (&info);
2003 occurs_left -= code;
2005 else if (cont_beg == 0)
2008 /* Make sure, if some occurrences are left, that we have
2009 continuation records. */
2010 if (occurs_left > 0 && cont_beg == 0)
2013 _("Number of repetitions specified on OCCURS (%d) "
2014 "exceed number of repetitions available in "
2015 "space on STARTS (%d), and CONTINUED not specified."),
2016 occurs, (starts_end - starts_beg + 1) / length);
2020 /* Go on to additional records. */
2021 while (occurs_left != 0)
2023 struct rpd_parse_info info;
2025 assert (occurs_left >= 0);
2027 /* Read in another record. */
2028 if (dfm_eof (t->reader))
2031 _("Unexpected end of file with %d repetitions "
2032 "remaining out of %d."),
2033 occurs_left, occurs);
2036 dfm_expand_tabs (t->reader);
2037 dfm_get_record (t->reader, &line);
2038 dfm_forward_record (t->reader);
2040 /* Parse this record. */
2042 info.line = ls_c_str (&line);
2043 info.len = ls_length (&line);
2044 info.beg = cont_beg;
2045 info.end = cont_end;
2049 info.max_occurs = occurs_left;
2050 code = rpd_parse_record (&info);;
2053 occurs_left -= code;
2056 dfm_pop (t->reader);
2058 /* FIXME: This is a kluge until we've implemented multiplexing of
2063 /* Frees a REPEATING DATA transformation. */
2065 repeating_data_trns_free (struct trns_header *rpd_)
2067 struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_;
2069 destroy_dls_var_spec (rpd->first);
2070 dfm_close_reader (rpd->reader);
2071 free (rpd->id_value);
2074 /* Lets repeating_data_trns_proc() know how to write the cases
2075 that it composes. Not elegant. */
2077 repeating_data_set_write_case (struct trns_header *trns,
2078 write_case_func *write_case,
2079 write_case_data wc_data)
2081 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
2083 assert (trns->proc == repeating_data_trns_proc);
2084 t->write_case = write_case;
2085 t->wc_data = wc_data;