1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "data-list.h"
31 #include "debug-print.h"
33 #include "dictionary.h"
35 #include "file-handle.h"
45 /* Utility function. */
47 /* FIXME: Either REPEATING DATA must be the last transformation, or we
48 must multiplex the transformations that follow (i.e., perform them
49 for every case that we produce from a repetition instance).
50 Currently we do neither. We should do one or the other. */
52 /* Describes how to parse one variable. */
55 struct dls_var_spec *next; /* Next specification in list. */
57 /* Both free and fixed formats. */
58 struct fmt_spec input; /* Input format of this field. */
59 struct variable *v; /* Associated variable. Used only in
60 parsing. Not safe later. */
61 int fv; /* First value in case. */
63 /* Fixed format only. */
64 int rec; /* Record number (1-based). */
65 int fc, lc; /* Column numbers in record. */
67 /* Free format only. */
68 char name[9]; /* Name of variable. */
71 /* Constants for DATA LIST type. */
72 /* Must match table in cmd_data_list(). */
80 /* DATA LIST private data structure. */
85 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
86 struct dfm_reader *reader; /* Data file reader. */
88 int type; /* A DLS_* constant. */
89 struct variable *end; /* Variable specified on END subcommand. */
90 int eof; /* End of file encountered. */
91 int rec_cnt; /* Number of records. */
92 size_t case_size; /* Case size in bytes. */
93 char *delims; /* Delimiters if any; not null-terminated. */
94 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
97 static int parse_fixed (struct data_list_pgm *);
98 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
99 static void dump_fixed_table (const struct dls_var_spec *,
100 const struct file_handle *, int rec_cnt);
101 static void dump_free_table (const struct data_list_pgm *,
102 const struct file_handle *);
103 static void destroy_dls_var_spec (struct dls_var_spec *);
104 static trns_free_func data_list_trns_free;
105 static trns_proc_func data_list_trns_proc;
107 /* Message title for REPEATING DATA. */
108 #define RPD_ERR "REPEATING DATA: "
113 struct data_list_pgm *dls; /* DATA LIST program under construction. */
114 int table = -1; /* Print table if nonzero, -1=undecided. */
115 struct file_handle *fh = NULL; /* File handle of source, NULL=inline file. */
117 if (!case_source_is_complex (vfm_source))
118 discard_variables ();
120 dls = xmalloc (sizeof *dls);
128 dls->first = dls->last = NULL;
132 if (lex_match_id ("FILE"))
138 if (case_source_is_class (vfm_source, &file_type_source_class)
139 && fh != default_handle)
141 msg (SE, _("DATA LIST may not use a different file from "
142 "that specified on its surrounding FILE TYPE."));
146 else if (lex_match_id ("RECORDS"))
150 if (!lex_force_int ())
152 dls->rec_cnt = lex_integer ();
156 else if (lex_match_id ("END"))
160 msg (SE, _("The END subcommand may only be specified once."));
165 if (!lex_force_id ())
167 dls->end = dict_lookup_var (default_dict, tokid);
169 dls->end = dict_create_var_assert (default_dict, tokid, 0);
172 else if (token == T_ID)
174 if (lex_match_id ("NOTABLE"))
176 else if (lex_match_id ("TABLE"))
181 if (lex_match_id ("FIXED"))
183 else if (lex_match_id ("FREE"))
185 else if (lex_match_id ("LIST"))
195 msg (SE, _("Only one of FIXED, FREE, or LIST may "
201 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
204 while (!lex_match (')'))
208 if (lex_match_id ("TAB"))
210 else if (token == T_STRING && tokstr.length == 1)
212 delim = tokstr.string[0];
221 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
222 dls->delims[dls->delim_cnt++] = delim;
236 dls->case_size = dict_get_case_size (default_dict);
240 dls->type = DLS_FIXED;
244 if (dls->type == DLS_FREE)
250 if (dls->type == DLS_FIXED)
252 if (!parse_fixed (dls))
255 dump_fixed_table (dls->first, fh, dls->rec_cnt);
259 if (!parse_free (&dls->first, &dls->last))
262 dump_free_table (dls, fh);
265 dls->reader = dfm_open_reader (fh);
266 if (dls->reader == NULL)
269 if (vfm_source != NULL)
271 struct data_list_pgm *new_pgm;
273 dls->h.proc = data_list_trns_proc;
274 dls->h.free = data_list_trns_free;
276 new_pgm = xmalloc (sizeof *new_pgm);
277 memcpy (new_pgm, &dls, sizeof *new_pgm);
278 add_transformation (&new_pgm->h);
281 vfm_source = create_case_source (&data_list_source_class,
287 destroy_dls_var_spec (dls->first);
293 /* Adds SPEC to the linked list with head at FIRST and tail at
296 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
297 struct dls_var_spec *spec)
304 (*last)->next = spec;
308 /* Fixed-format parsing. */
310 /* Used for chaining together fortran-like format specifiers. */
313 struct fmt_list *next;
316 struct fmt_list *down;
319 /* State of parsing DATA LIST. */
320 struct fixed_parsing_state
322 char **name; /* Variable names. */
323 int name_cnt; /* Number of names. */
325 int recno; /* Index of current record. */
326 int sc; /* 1-based column number of starting column for
327 next field to output. */
330 static int fixed_parse_compatible (struct fixed_parsing_state *,
331 struct dls_var_spec **,
332 struct dls_var_spec **);
333 static int fixed_parse_fortran (struct fixed_parsing_state *,
334 struct dls_var_spec **,
335 struct dls_var_spec **);
337 /* Parses all the variable specifications for DATA LIST FIXED,
338 storing them into DLS. Returns nonzero if successful. */
340 parse_fixed (struct data_list_pgm *dls)
342 struct fixed_parsing_state fx;
350 while (lex_match ('/'))
353 if (lex_integer_p ())
355 if (lex_integer () < fx.recno)
357 msg (SE, _("The record number specified, %ld, is "
358 "before the previous record, %d. Data "
359 "fields must be listed in order of "
360 "increasing record number."),
361 lex_integer (), fx.recno - 1);
365 fx.recno = lex_integer ();
371 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
376 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
379 else if (token == '(')
381 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
386 msg (SE, _("SPSS-like or FORTRAN-like format "
387 "specification expected after variable names."));
391 for (i = 0; i < fx.name_cnt; i++)
395 if (dls->first == NULL)
397 msg (SE, _("At least one variable must be specified."));
400 if (dls->rec_cnt && dls->last->rec > dls->rec_cnt)
402 msg (SE, _("Variables are specified on records that "
403 "should not exist according to RECORDS subcommand."));
406 else if (!dls->rec_cnt)
407 dls->rec_cnt = dls->last->rec;
410 lex_error (_("expecting end of command"));
416 for (i = 0; i < fx.name_cnt; i++)
422 /* Parses a variable specification in the form 1-10 (A) based on
423 FX and adds specifications to the linked list with head at
424 FIRST and tail at LAST. */
426 fixed_parse_compatible (struct fixed_parsing_state *fx,
427 struct dls_var_spec **first, struct dls_var_spec **last)
429 struct fmt_spec input;
435 if (!lex_force_int ())
440 msg (SE, _("Column positions for fields must be positive."));
446 lex_negative_to_dash ();
449 if (!lex_force_int ())
454 msg (SE, _("Column positions for fields must be positive."));
459 msg (SE, _("The ending column for a field must be "
460 "greater than the starting column."));
469 /* Divide columns evenly. */
470 input.w = (lc - fc + 1) / fx->name_cnt;
471 if ((lc - fc + 1) % fx->name_cnt)
473 msg (SE, _("The %d columns %d-%d "
474 "can't be evenly divided into %d fields."),
475 lc - fc + 1, fc, lc, fx->name_cnt);
479 /* Format specifier. */
482 struct fmt_desc *fdp;
488 input.type = parse_format_specifier_name (&cp, 0);
489 if (input.type == -1)
493 msg (SE, _("A format specifier on this line "
494 "has extra characters on the end."));
504 if (lex_integer_p ())
506 if (lex_integer () < 1)
508 msg (SE, _("The value for number of decimal places "
509 "must be at least 1."));
513 input.d = lex_integer ();
519 fdp = &formats[input.type];
520 if (fdp->n_args < 2 && input.d)
522 msg (SE, _("Input format %s doesn't accept decimal places."),
530 if (!lex_force_match (')'))
538 if (!check_input_specifier (&input))
541 /* Start column for next specification. */
544 /* Width of variables to create. */
545 if (input.type == FMT_A || input.type == FMT_AHEX)
550 /* Create variables and var specs. */
551 for (i = 0; i < fx->name_cnt; i++)
553 struct dls_var_spec *spec;
556 v = dict_create_var (default_dict, fx->name[i], width);
559 convert_fmt_ItoO (&input, &v->print);
561 if (!case_source_is_complex (vfm_source))
566 v = dict_lookup_var_assert (default_dict, fx->name[i]);
567 if (vfm_source == NULL)
569 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
572 if ((width != 0) != (v->width != 0))
574 msg (SE, _("There is already a variable %s of a "
579 if (width != 0 && width != v->width)
581 msg (SE, _("There is already a string variable %s of a "
582 "different width."), fx->name[i]);
587 spec = xmalloc (sizeof *spec);
591 spec->rec = fx->recno;
592 spec->fc = fc + input.w * i;
593 spec->lc = spec->fc + input.w - 1;
594 append_var_spec (first, last, spec);
599 /* Destroy format list F and, if RECURSE is nonzero, all its
602 destroy_fmt_list (struct fmt_list *f, int recurse)
604 struct fmt_list *next;
609 if (recurse && f->f.type == FMT_DESCEND)
610 destroy_fmt_list (f->down, 1);
615 /* Takes a hierarchically structured fmt_list F as constructed by
616 fixed_parse_fortran(), and flattens it, adding the variable
617 specifications to the linked list with head FIRST and tail
618 LAST. NAME_IDX is used to take values from the list of names
619 in FX; it should initially point to a value of 0. */
621 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
622 struct dls_var_spec **first, struct dls_var_spec **last,
627 for (; f; f = f->next)
628 if (f->f.type == FMT_X)
630 else if (f->f.type == FMT_T)
632 else if (f->f.type == FMT_NEWREC)
634 fx->recno += f->count;
638 for (i = 0; i < f->count; i++)
639 if (f->f.type == FMT_DESCEND)
641 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
646 struct dls_var_spec *spec;
650 if (formats[f->f.type].cat & FCAT_STRING)
654 if (*name_idx >= fx->name_cnt)
656 msg (SE, _("The number of format "
657 "specifications exceeds the given number of "
662 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
665 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
669 if (!case_source_is_complex (vfm_source))
672 spec = xmalloc (sizeof *spec);
676 spec->rec = fx->recno;
678 spec->lc = fx->sc + f->f.w - 1;
679 append_var_spec (first, last, spec);
681 convert_fmt_ItoO (&spec->input, &v->print);
689 /* Recursively parses a FORTRAN-like format specification into
690 the linked list with head FIRST and tail TAIL. LEVEL is the
691 level of recursion, starting from 0. Returns the parsed
692 specification if successful, or a null pointer on failure. */
693 static struct fmt_list *
694 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
695 struct dls_var_spec **first,
696 struct dls_var_spec **last)
698 struct fmt_list *head = NULL;
699 struct fmt_list *tail = NULL;
701 lex_force_match ('(');
705 struct fmt_list *new = xmalloc (sizeof *new);
708 /* Append new to list. */
716 if (lex_integer_p ())
718 new->count = lex_integer ();
724 /* Parse format specifier. */
727 new->f.type = FMT_DESCEND;
728 new->down = fixed_parse_fortran_internal (fx, first, last);
729 if (new->down == NULL)
732 else if (lex_match ('/'))
733 new->f.type = FMT_NEWREC;
734 else if (!parse_format_specifier (&new->f, 1)
735 || !check_input_specifier (&new->f))
740 lex_force_match (')');
745 destroy_fmt_list (head, 0);
750 /* Parses a FORTRAN-like format specification into the linked
751 list with head FIRST and tail LAST. Returns nonzero if
754 fixed_parse_fortran (struct fixed_parsing_state *fx,
755 struct dls_var_spec **first, struct dls_var_spec **last)
757 struct fmt_list *list;
760 list = fixed_parse_fortran_internal (fx, first, last);
765 dump_fmt_list (fx, list, first, last, &name_idx);
766 destroy_fmt_list (list, 1);
767 if (name_idx < fx->name_cnt)
769 msg (SE, _("There aren't enough format specifications "
770 "to match the number of variable names given."));
777 /* Displays a table giving information on fixed-format variable
778 parsing on DATA LIST. */
779 /* FIXME: The `Columns' column should be divided into three columns,
780 one for the starting column, one for the dash, one for the ending
781 column; then right-justify the starting column and left-justify the
784 dump_fixed_table (const struct dls_var_spec *specs,
785 const struct file_handle *fh, int rec_cnt)
787 const struct dls_var_spec *spec;
791 for (i = 0, spec = specs; spec; spec = spec->next)
793 t = tab_create (4, i + 1, 0);
794 tab_columns (t, TAB_COL_DOWN, 1);
795 tab_headers (t, 0, 0, 1, 0);
796 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
797 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
798 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
799 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
800 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
801 tab_hline (t, TAL_2, 0, 3, 1);
802 tab_dim (t, tab_natural_dimensions);
804 for (i = 1, spec = specs; spec; spec = spec->next, i++)
806 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
807 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
808 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
810 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
811 fmt_to_string (&spec->input));
815 tab_title (t, 1, ngettext ("Reading %d record from file %s.",
816 "Reading %d records from file %s.", rec_cnt),
817 rec_cnt, handle_get_filename (fh));
819 tab_title (t, 1, ngettext ("Reading %d record from the command file.",
820 "Reading %d records from the command file.",
826 /* Free-format parsing. */
828 /* Parses variable specifications for DATA LIST FREE and adds
829 them to the linked list with head FIRST and tail LAST.
830 Returns nonzero only if successful. */
832 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
837 struct fmt_spec input, output;
843 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
847 if (!parse_format_specifier (&input, 0)
848 || !check_input_specifier (&input)
849 || !lex_force_match (')'))
851 for (i = 0; i < name_cnt; i++)
856 convert_fmt_ItoO (&input, &output);
864 output = get_format();
867 if (input.type == FMT_A || input.type == FMT_AHEX)
871 for (i = 0; i < name_cnt; i++)
873 struct dls_var_spec *spec;
876 v = dict_create_var (default_dict, name[i], width);
879 msg (SE, _("%s is a duplicate variable name."), name[i]);
882 v->print = v->write = output;
884 if (!case_source_is_complex (vfm_source))
887 spec = xmalloc (sizeof *spec);
891 strcpy (spec->name, name[i]);
892 append_var_spec (first, last, spec);
894 for (i = 0; i < name_cnt; i++)
900 lex_error (_("expecting end of command"));
904 /* Displays a table giving information on free-format variable parsing
907 dump_free_table (const struct data_list_pgm *dls,
908 const struct file_handle *fh)
914 struct dls_var_spec *spec;
915 for (i = 0, spec = dls->first; spec; spec = spec->next)
919 t = tab_create (2, i + 1, 0);
920 tab_columns (t, TAB_COL_DOWN, 1);
921 tab_headers (t, 0, 0, 1, 0);
922 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
923 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
924 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
925 tab_hline (t, TAL_2, 0, 1, 1);
926 tab_dim (t, tab_natural_dimensions);
929 struct dls_var_spec *spec;
931 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
933 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
934 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
939 tab_title (t, 1, _("Reading free-form data from file %s."),
940 handle_get_filename (fh));
942 tab_title (t, 1, _("Reading free-form data from the command file."));
947 /* Input procedure. */
949 /* Extracts a field from the current position in the current
950 record. Fields can be unquoted or quoted with single- or
951 double-quote characters. *FIELD is set to the field content.
952 After parsing the field, sets the current position in the
953 record to just past the field and any trailing delimiter.
954 END_BLANK is used internally; it should be initialized by the
955 caller to 0 and left alone afterward. Returns 0 on failure or
956 a 1-based column number indicating the beginning of the field
959 cut_field (const struct data_list_pgm *dls, struct len_string *field,
962 struct len_string line;
966 if (dfm_eof (dls->reader))
968 if (dls->delim_cnt == 0)
969 dfm_expand_tabs (dls->reader);
970 dfm_get_record (dls->reader, &line);
972 cp = ls_c_str (&line);
973 if (dls->delim_cnt == 0)
975 /* Skip leading whitespace. */
976 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
978 if (cp >= ls_end (&line))
981 /* Handle actual data, whether quoted or unquoted. */
982 if (*cp == '\'' || *cp == '"')
986 field->string = ++cp;
987 while (cp < ls_end (&line) && *cp != quote)
989 field->length = cp - field->string;
990 if (cp < ls_end (&line))
993 msg (SW, _("Quoted string missing terminating `%c'."), quote);
998 while (cp < ls_end (&line)
999 && !isspace ((unsigned char) *cp) && *cp != ',')
1001 field->length = cp - field->string;
1004 /* Skip trailing whitespace and a single comma if present. */
1005 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
1007 if (cp < ls_end (&line) && *cp == ',')
1012 if (cp >= ls_end (&line))
1014 int column = dfm_column_start (dls->reader);
1015 /* A blank line or a line that ends in \t has a
1016 trailing blank field. */
1017 if (column == 1 || (column > 1 && cp[-1] == '\t'))
1019 if (*end_blank == 0)
1022 field->string = ls_end (&line);
1024 dfm_forward_record (dls->reader);
1039 while (cp < ls_end (&line)
1040 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1042 field->length = cp - field->string;
1043 if (cp < ls_end (&line))
1048 dfm_forward_columns (dls->reader, field->string - line.string);
1049 column_start = dfm_column_start (dls->reader);
1051 dfm_forward_columns (dls->reader, cp - field->string);
1053 return column_start;
1056 typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *);
1057 static data_list_read_func read_from_data_list_fixed;
1058 static data_list_read_func read_from_data_list_free;
1059 static data_list_read_func read_from_data_list_list;
1061 /* Returns the proper function to read the kind of DATA LIST
1062 data specified by DLS. */
1063 static data_list_read_func *
1064 get_data_list_read_func (const struct data_list_pgm *dls)
1069 return read_from_data_list_fixed;
1072 return read_from_data_list_free;
1075 return read_from_data_list_list;
1083 /* Reads a case from the data file into C, parsing it according
1084 to fixed-format syntax rules in DLS. Returns -1 on success,
1085 -2 at end of file. */
1087 read_from_data_list_fixed (const struct data_list_pgm *dls,
1090 struct dls_var_spec *var_spec = dls->first;
1093 if (dfm_eof (dls->reader))
1095 for (i = 1; i <= dls->rec_cnt; i++)
1097 struct len_string line;
1099 if (dfm_eof (dls->reader))
1101 /* Note that this can't occur on the first record. */
1102 msg (SW, _("Partial case of %d of %d records discarded."),
1103 i - 1, dls->rec_cnt);
1106 dfm_expand_tabs (dls->reader);
1107 dfm_get_record (dls->reader, &line);
1109 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1113 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1114 var_spec->fc, var_spec->lc);
1115 di.v = case_data_rw (c, var_spec->fv);
1117 di.f1 = var_spec->fc;
1118 di.format = var_spec->input;
1123 dfm_forward_record (dls->reader);
1129 /* Reads a case from the data file into C, parsing it according
1130 to free-format syntax rules in DLS. Returns -1 on success,
1131 -2 at end of file. */
1133 read_from_data_list_free (const struct data_list_pgm *dls,
1136 struct dls_var_spec *var_spec;
1139 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1141 struct len_string field;
1144 /* Cut out a field and read in a new record if necessary. */
1147 column = cut_field (dls, &field, &end_blank);
1151 if (!dfm_eof (dls->reader))
1152 dfm_forward_record (dls->reader);
1153 if (dfm_eof (dls->reader))
1155 if (var_spec != dls->first)
1156 msg (SW, _("Partial case discarded. The first variable "
1157 "missing was %s."), var_spec->name);
1165 di.s = ls_c_str (&field);
1166 di.e = ls_end (&field);
1167 di.v = case_data_rw (c, var_spec->fv);
1170 di.format = var_spec->input;
1177 /* Reads a case from the data file and parses it according to
1178 list-format syntax rules. Returns -1 on success, -2 at end of
1181 read_from_data_list_list (const struct data_list_pgm *dls,
1184 struct dls_var_spec *var_spec;
1187 if (dfm_eof (dls->reader))
1190 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1192 struct len_string field;
1195 /* Cut out a field and check for end-of-line. */
1196 column = cut_field (dls, &field, &end_blank);
1199 if (get_undefined ())
1200 msg (SW, _("Missing value(s) for all variables from %s onward. "
1201 "These will be filled with the system-missing value "
1202 "or blanks, as appropriate."),
1204 for (; var_spec; var_spec = var_spec->next)
1206 int width = get_format_var_width (&var_spec->input);
1208 case_data_rw (c, var_spec->fv)->f = SYSMIS;
1210 memset (case_data_rw (c, var_spec->fv)->s, ' ', width);
1218 di.s = ls_c_str (&field);
1219 di.e = ls_end (&field);
1220 di.v = case_data_rw (c, var_spec->fv);
1223 di.format = var_spec->input;
1228 dfm_forward_record (dls->reader);
1232 /* Destroys SPEC. */
1234 destroy_dls_var_spec (struct dls_var_spec *spec)
1236 struct dls_var_spec *next;
1238 while (spec != NULL)
1246 /* Destroys DATA LIST transformation PGM. */
1248 data_list_trns_free (struct trns_header *pgm)
1250 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1252 destroy_dls_var_spec (dls->first);
1253 dfm_close_reader (dls->reader);
1257 /* Handle DATA LIST transformation T, parsing data into C. */
1259 data_list_trns_proc (struct trns_header *t, struct ccase *c,
1260 int case_num UNUSED)
1262 struct data_list_pgm *dls = (struct data_list_pgm *) t;
1263 data_list_read_func *read_func;
1266 dfm_push (dls->reader);
1268 read_func = get_data_list_read_func (dls);
1269 retval = read_func (dls, c);
1271 /* Handle end of file. */
1274 /* If we already encountered end of file then this is an
1278 msg (SE, _("Attempt to read past end of file."));
1280 dfm_pop (dls->reader);
1284 /* Otherwise simply note it. */
1290 /* If there was an END subcommand handle it. */
1291 if (dls->end != NULL)
1295 case_data_rw (c, dls->end->fv)->f = 1.0;
1299 case_data_rw (c, dls->end->fv)->f = 0.0;
1302 dfm_pop (dls->reader);
1307 /* Reads all the records from the data file and passes them to
1310 data_list_source_read (struct case_source *source,
1312 write_case_func *write_case, write_case_data wc_data)
1314 struct data_list_pgm *dls = source->aux;
1315 data_list_read_func *read_func = get_data_list_read_func (dls);
1317 dfm_push (dls->reader);
1318 while (read_func (dls, c) != -2)
1319 if (!write_case (wc_data))
1321 dfm_pop (dls->reader);
1324 /* Destroys the source's internal data. */
1326 data_list_source_destroy (struct case_source *source)
1328 data_list_trns_free (source->aux);
1331 const struct case_source_class data_list_source_class =
1335 data_list_source_read,
1336 data_list_source_destroy,
1339 /* REPEATING DATA. */
1341 /* Represents a number or a variable. */
1342 struct rpd_num_or_var
1344 int num; /* Value, or 0. */
1345 struct variable *var; /* Variable, if number==0. */
1348 /* REPEATING DATA private data structure. */
1349 struct repeating_data_trns
1351 struct trns_header h;
1352 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1353 struct dfm_reader *reader; /* Input file, never NULL. */
1355 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1356 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1357 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1358 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1359 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1360 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1362 /* ID subcommand. */
1363 int id_beg, id_end; /* Beginning & end columns. */
1364 struct variable *id_var; /* DATA LIST variable. */
1365 struct fmt_spec id_spec; /* Input format spec. */
1366 union value *id_value; /* ID value. */
1368 write_case_func *write_case;
1369 write_case_data wc_data;
1372 static trns_free_func repeating_data_trns_free;
1373 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1374 static int parse_repeating_data (struct dls_var_spec **,
1375 struct dls_var_spec **);
1376 static void find_variable_input_spec (struct variable *v,
1377 struct fmt_spec *spec);
1379 /* Parses the REPEATING DATA command. */
1381 cmd_repeating_data (void)
1383 struct repeating_data_trns *rpd;
1384 int table = 1; /* Print table? */
1385 unsigned seen = 0; /* Mark subcommands as already seen. */
1386 struct file_handle *const fh = default_handle;
1388 assert (case_source_is_complex (vfm_source));
1390 rpd = xmalloc (sizeof *rpd);
1391 rpd->reader = dfm_open_reader (default_handle);
1392 rpd->first = rpd->last = NULL;
1393 rpd->starts_beg.num = 0;
1394 rpd->starts_beg.var = NULL;
1395 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1396 = rpd->cont_end = rpd->starts_beg;
1397 rpd->id_beg = rpd->id_end = 0;
1399 rpd->id_value = NULL;
1405 if (lex_match_id ("FILE"))
1407 struct file_handle *file;
1414 msg (SE, _("REPEATING DATA must use the same file as its "
1415 "corresponding DATA LIST or FILE TYPE."));
1419 else if (lex_match_id ("STARTS"))
1424 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1429 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1432 lex_negative_to_dash ();
1433 if (lex_match ('-'))
1435 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1438 /* Otherwise, rpd->starts_end is left uninitialized.
1439 This is okay. We will initialize it later from the
1440 record length of the file. We can't do this now
1441 because we can't be sure that the user has specified
1442 the file handle yet. */
1445 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1446 && rpd->starts_beg.num > rpd->starts_end.num)
1448 msg (SE, _("STARTS beginning column (%d) exceeds "
1449 "STARTS ending column (%d)."),
1450 rpd->starts_beg.num, rpd->starts_end.num);
1454 else if (lex_match_id ("OCCURS"))
1459 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1464 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1467 else if (lex_match_id ("LENGTH"))
1472 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1477 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1480 else if (lex_match_id ("CONTINUED"))
1485 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1490 if (!lex_match ('/'))
1492 if (!parse_num_or_var (&rpd->cont_beg, "CONTINUED beginning column"))
1495 lex_negative_to_dash ();
1497 && !parse_num_or_var (&rpd->cont_end,
1498 "CONTINUED ending column"))
1501 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1502 && rpd->cont_beg.num > rpd->cont_end.num)
1504 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1505 "CONTINUED ending column (%d)."),
1506 rpd->cont_beg.num, rpd->cont_end.num);
1511 rpd->cont_beg.num = 1;
1513 else if (lex_match_id ("ID"))
1518 msg (SE, _("%s subcommand given multiple times."),"ID");
1523 if (!lex_force_int ())
1525 if (lex_integer () < 1)
1527 msg (SE, _("ID beginning column (%ld) must be positive."),
1531 rpd->id_beg = lex_integer ();
1534 lex_negative_to_dash ();
1536 if (lex_match ('-'))
1538 if (!lex_force_int ())
1540 if (lex_integer () < 1)
1542 msg (SE, _("ID ending column (%ld) must be positive."),
1546 if (lex_integer () < rpd->id_end)
1548 msg (SE, _("ID ending column (%ld) cannot be less than "
1549 "ID beginning column (%d)."),
1550 lex_integer (), rpd->id_beg);
1554 rpd->id_end = lex_integer ();
1557 else rpd->id_end = rpd->id_beg;
1559 if (!lex_force_match ('='))
1561 rpd->id_var = parse_variable ();
1562 if (rpd->id_var == NULL)
1565 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1566 rpd->id_value = xmalloc (sizeof *rpd->id_value * rpd->id_var->nv);
1568 else if (lex_match_id ("TABLE"))
1570 else if (lex_match_id ("NOTABLE"))
1572 else if (lex_match_id ("DATA"))
1580 if (!lex_force_match ('/'))
1584 /* Comes here when DATA specification encountered. */
1585 if ((seen & (1 | 2)) != (1 | 2))
1587 if ((seen & 1) == 0)
1588 msg (SE, _("Missing required specification STARTS."));
1589 if ((seen & 2) == 0)
1590 msg (SE, _("Missing required specification OCCURS."));
1594 /* Enforce ID restriction. */
1595 if ((seen & 16) && !(seen & 8))
1597 msg (SE, _("ID specified without CONTINUED."));
1601 /* Calculate starts_end, cont_end if necessary. */
1602 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1603 rpd->starts_end.num = handle_get_record_width (fh);
1604 if (rpd->cont_end.num == 0 && rpd->starts_end.var == NULL)
1605 rpd->cont_end.num = handle_get_record_width (fh);
1607 /* Calculate length if possible. */
1608 if ((seen & 4) == 0)
1610 struct dls_var_spec *iter;
1612 for (iter = rpd->first; iter; iter = iter->next)
1614 if (iter->lc > rpd->length.num)
1615 rpd->length.num = iter->lc;
1617 assert (rpd->length.num != 0);
1621 if (!parse_repeating_data (&rpd->first, &rpd->last))
1625 dump_fixed_table (rpd->first, fh, rpd->last->rec);
1628 struct repeating_data_trns *new_trns;
1630 rpd->h.proc = repeating_data_trns_proc;
1631 rpd->h.free = repeating_data_trns_free;
1633 new_trns = xmalloc (sizeof *new_trns);
1634 memcpy (new_trns, &rpd, sizeof *new_trns);
1635 add_transformation ((struct trns_header *) new_trns);
1638 return lex_end_of_command ();
1641 destroy_dls_var_spec (rpd->first);
1642 free (rpd->id_value);
1646 /* Finds the input format specification for variable V and puts
1647 it in SPEC. Because of the way that DATA LIST is structured,
1648 this is nontrivial. */
1650 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1654 for (i = 0; i < n_trns; i++)
1656 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1658 if (pgm->h.proc == data_list_trns_proc)
1660 struct dls_var_spec *iter;
1662 for (iter = pgm->first; iter; iter = iter->next)
1665 *spec = iter->input;
1674 /* Parses a number or a variable name from the syntax file and puts
1675 the results in VALUE. Ensures that the number is at least 1; else
1676 emits an error based on MESSAGE. Returns nonzero only if
1679 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1684 value->var = parse_variable ();
1685 if (value->var == NULL)
1687 if (value->var->type == ALPHA)
1689 msg (SE, _("String variable not allowed here."));
1693 else if (lex_integer_p ())
1695 value->num = lex_integer ();
1699 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1705 msg (SE, _("Variable or integer expected for %s."), message);
1711 /* Parses data specifications for repeating data groups, adding
1712 them to the linked list with head FIRST and tail LAST.
1713 Returns nonzero only if successful. */
1715 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1717 struct fixed_parsing_state fx;
1723 while (token != '.')
1725 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1730 if (!fixed_parse_compatible (&fx, first, last))
1733 else if (token == '(')
1735 if (!fixed_parse_fortran (&fx, first, last))
1740 msg (SE, _("SPSS-like or FORTRAN-like format "
1741 "specification expected after variable names."));
1745 for (i = 0; i < fx.name_cnt; i++)
1751 lex_error (_("expecting end of command"));
1758 for (i = 0; i < fx.name_cnt; i++)
1764 /* Obtains the real value for rpd_num_or_var N in case C and returns
1765 it. The valid range is nonnegative numbers, but numbers outside
1766 this range can be returned and should be handled by the caller as
1769 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1774 assert (n->num == 0);
1777 double v = case_num (c, n->var->fv);
1779 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1788 /* Parameter record passed to rpd_parse_record(). */
1789 struct rpd_parse_info
1791 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1792 const char *line; /* Line being parsed. */
1793 size_t len; /* Line length. */
1794 int beg, end; /* First and last column of first occurrence. */
1795 int ofs; /* Column offset between repeated occurrences. */
1796 struct ccase *c; /* Case to fill in. */
1797 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1798 int max_occurs; /* Max number of occurrences to parse. */
1801 /* Parses one record of repeated data and outputs corresponding
1802 cases. Returns number of occurrences parsed up to the
1803 maximum specified in INFO. */
1805 rpd_parse_record (const struct rpd_parse_info *info)
1807 struct repeating_data_trns *t = info->trns;
1808 int cur = info->beg;
1811 /* Handle record ID values. */
1814 union value id_temp[MAX_ELEMS_PER_VALUE];
1816 /* Parse record ID into V. */
1820 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1821 di.v = info->verify_id ? id_temp : t->id_value;
1824 di.format = t->id_spec;
1831 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1833 char expected_str [MAX_FORMATTED_LEN + 1];
1834 char actual_str [MAX_FORMATTED_LEN + 1];
1836 data_out (expected_str, &t->id_var->print, t->id_value);
1837 expected_str[t->id_var->print.w] = '\0';
1839 data_out (actual_str, &t->id_var->print, id_temp);
1840 actual_str[t->id_var->print.w] = '\0';
1843 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1844 actual_str, expected_str);
1850 /* Iterate over the set of expected occurrences and record each of
1851 them as a separate case. FIXME: We need to execute any
1852 transformations that follow the current one. */
1856 for (occurrences = 0; occurrences < info->max_occurs; )
1858 if (cur + info->ofs > info->end + 1)
1863 struct dls_var_spec *var_spec = t->first;
1865 for (; var_spec; var_spec = var_spec->next)
1867 int fc = var_spec->fc - 1 + cur;
1868 int lc = var_spec->lc - 1 + cur;
1870 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1875 _("Variable %s starting in column %d extends "
1876 "beyond physical record length of %d."),
1877 var_spec->v->name, fc, info->len);
1883 data_in_finite_line (&di, info->line, info->len, fc, lc);
1884 di.v = case_data_rw (info->c, var_spec->fv);
1887 di.format = var_spec->input;
1897 if (!t->write_case (t->wc_data))
1905 /* Reads one set of repetitions of the elements in the REPEATING
1906 DATA structure. Returns -1 on success, -2 on end of file or
1909 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c,
1910 int case_num UNUSED)
1912 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1914 struct len_string line; /* Current record. */
1916 int starts_beg; /* Starting column. */
1917 int starts_end; /* Ending column. */
1918 int occurs; /* Number of repetitions. */
1919 int length; /* Length of each occurrence. */
1920 int cont_beg; /* Starting column for continuation lines. */
1921 int cont_end; /* Ending column for continuation lines. */
1923 int occurs_left; /* Number of occurrences remaining. */
1925 int code; /* Return value from rpd_parse_record(). */
1927 int skip_first_record = 0;
1929 dfm_push (t->reader);
1931 /* Read the current record. */
1932 dfm_reread_record (t->reader, 1);
1933 dfm_expand_tabs (t->reader);
1934 if (dfm_eof (t->reader))
1936 dfm_get_record (t->reader, &line);
1937 dfm_forward_record (t->reader);
1939 /* Calculate occurs, length. */
1940 occurs_left = occurs = realize_value (&t->occurs, c);
1943 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1946 starts_beg = realize_value (&t->starts_beg, c);
1947 if (starts_beg <= 0)
1949 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1954 starts_end = realize_value (&t->starts_end, c);
1955 if (starts_end < starts_beg)
1957 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1958 "beginning column (%d)."),
1959 starts_end, starts_beg);
1960 skip_first_record = 1;
1962 length = realize_value (&t->length, c);
1965 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1967 occurs = occurs_left = 1;
1969 cont_beg = realize_value (&t->cont_beg, c);
1972 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1977 cont_end = realize_value (&t->cont_end, c);
1978 if (cont_end < cont_beg)
1980 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1981 "beginning column (%d)."),
1982 cont_end, cont_beg);
1986 /* Parse the first record. */
1987 if (!skip_first_record)
1989 struct rpd_parse_info info;
1991 info.line = ls_c_str (&line);
1992 info.len = ls_length (&line);
1993 info.beg = starts_beg;
1994 info.end = starts_end;
1998 info.max_occurs = occurs_left;
1999 code = rpd_parse_record (&info);
2002 occurs_left -= code;
2004 else if (cont_beg == 0)
2007 /* Make sure, if some occurrences are left, that we have
2008 continuation records. */
2009 if (occurs_left > 0 && cont_beg == 0)
2012 _("Number of repetitions specified on OCCURS (%d) "
2013 "exceed number of repetitions available in "
2014 "space on STARTS (%d), and CONTINUED not specified."),
2015 occurs, (starts_end - starts_beg + 1) / length);
2019 /* Go on to additional records. */
2020 while (occurs_left != 0)
2022 struct rpd_parse_info info;
2024 assert (occurs_left >= 0);
2026 /* Read in another record. */
2027 if (dfm_eof (t->reader))
2030 _("Unexpected end of file with %d repetitions "
2031 "remaining out of %d."),
2032 occurs_left, occurs);
2035 dfm_expand_tabs (t->reader);
2036 dfm_get_record (t->reader, &line);
2037 dfm_forward_record (t->reader);
2039 /* Parse this record. */
2041 info.line = ls_c_str (&line);
2042 info.len = ls_length (&line);
2043 info.beg = cont_beg;
2044 info.end = cont_end;
2048 info.max_occurs = occurs_left;
2049 code = rpd_parse_record (&info);;
2052 occurs_left -= code;
2055 dfm_pop (t->reader);
2057 /* FIXME: This is a kluge until we've implemented multiplexing of
2062 /* Frees a REPEATING DATA transformation. */
2064 repeating_data_trns_free (struct trns_header *rpd_)
2066 struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_;
2068 destroy_dls_var_spec (rpd->first);
2069 dfm_close_reader (rpd->reader);
2070 free (rpd->id_value);
2073 /* Lets repeating_data_trns_proc() know how to write the cases
2074 that it composes. Not elegant. */
2076 repeating_data_set_write_case (struct trns_header *trns,
2077 write_case_func *write_case,
2078 write_case_data wc_data)
2080 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
2082 assert (trns->proc == repeating_data_trns_proc);
2083 t->write_case = write_case;
2084 t->wc_data = wc_data;