1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include "data-list.h"
31 #include "debug-print.h"
33 #include "dictionary.h"
35 #include "file-handle.h"
46 #define _(msgid) gettext (msgid)
48 /* Utility function. */
50 /* FIXME: Either REPEATING DATA must be the last transformation, or we
51 must multiplex the transformations that follow (i.e., perform them
52 for every case that we produce from a repetition instance).
53 Currently we do neither. We should do one or the other. */
55 /* Describes how to parse one variable. */
58 struct dls_var_spec *next; /* Next specification in list. */
60 /* Both free and fixed formats. */
61 struct fmt_spec input; /* Input format of this field. */
62 struct variable *v; /* Associated variable. Used only in
63 parsing. Not safe later. */
64 int fv; /* First value in case. */
66 /* Fixed format only. */
67 int rec; /* Record number (1-based). */
68 int fc, lc; /* Column numbers in record. */
70 /* Free format only. */
71 char name[LONG_NAME_LEN + 1]; /* Name of variable. */
74 /* Constants for DATA LIST type. */
75 /* Must match table in cmd_data_list(). */
83 /* DATA LIST private data structure. */
88 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
89 struct dfm_reader *reader; /* Data file reader. */
91 int type; /* A DLS_* constant. */
92 struct variable *end; /* Variable specified on END subcommand. */
93 int eof; /* End of file encountered. */
94 int rec_cnt; /* Number of records. */
95 size_t case_size; /* Case size in bytes. */
96 char *delims; /* Delimiters if any; not null-terminated. */
97 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
100 static int parse_fixed (struct data_list_pgm *);
101 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
102 static void dump_fixed_table (const struct dls_var_spec *,
103 const struct file_handle *, int rec_cnt);
104 static void dump_free_table (const struct data_list_pgm *,
105 const struct file_handle *);
106 static void destroy_dls_var_spec (struct dls_var_spec *);
107 static trns_free_func data_list_trns_free;
108 static trns_proc_func data_list_trns_proc;
110 /* Message title for REPEATING DATA. */
111 #define RPD_ERR "REPEATING DATA: "
116 struct data_list_pgm *dls; /* DATA LIST program under construction. */
117 int table = -1; /* Print table if nonzero, -1=undecided. */
118 struct file_handle *fh = NULL; /* File handle of source, NULL=inline file. */
120 if (!case_source_is_complex (vfm_source))
121 discard_variables ();
123 dls = xmalloc (sizeof *dls);
131 dls->first = dls->last = NULL;
135 if (lex_match_id ("FILE"))
141 if (case_source_is_class (vfm_source, &file_type_source_class)
142 && fh != default_handle)
144 msg (SE, _("DATA LIST may not use a different file from "
145 "that specified on its surrounding FILE TYPE."));
149 else if (lex_match_id ("RECORDS"))
153 if (!lex_force_int ())
155 dls->rec_cnt = lex_integer ();
159 else if (lex_match_id ("END"))
163 msg (SE, _("The END subcommand may only be specified once."));
168 if (!lex_force_id ())
170 dls->end = dict_lookup_var (default_dict, tokid);
172 dls->end = dict_create_var_assert (default_dict, tokid, 0);
175 else if (token == T_ID)
177 if (lex_match_id ("NOTABLE"))
179 else if (lex_match_id ("TABLE"))
184 if (lex_match_id ("FIXED"))
186 else if (lex_match_id ("FREE"))
188 else if (lex_match_id ("LIST"))
198 msg (SE, _("Only one of FIXED, FREE, or LIST may "
204 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
207 while (!lex_match (')'))
211 if (lex_match_id ("TAB"))
213 else if (token == T_STRING && tokstr.length == 1)
215 delim = tokstr.string[0];
224 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
225 dls->delims[dls->delim_cnt++] = delim;
239 dls->case_size = dict_get_case_size (default_dict);
243 dls->type = DLS_FIXED;
247 if (dls->type == DLS_FREE)
253 if (dls->type == DLS_FIXED)
255 if (!parse_fixed (dls))
258 dump_fixed_table (dls->first, fh, dls->rec_cnt);
262 if (!parse_free (&dls->first, &dls->last))
265 dump_free_table (dls, fh);
268 dls->reader = dfm_open_reader (fh);
269 if (dls->reader == NULL)
272 if (vfm_source != NULL)
274 dls->h.proc = data_list_trns_proc;
275 dls->h.free = data_list_trns_free;
276 add_transformation (&dls->h);
279 vfm_source = create_case_source (&data_list_source_class, dls);
284 destroy_dls_var_spec (dls->first);
290 /* Adds SPEC to the linked list with head at FIRST and tail at
293 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
294 struct dls_var_spec *spec)
301 (*last)->next = spec;
305 /* Fixed-format parsing. */
307 /* Used for chaining together fortran-like format specifiers. */
310 struct fmt_list *next;
313 struct fmt_list *down;
316 /* State of parsing DATA LIST. */
317 struct fixed_parsing_state
319 char **name; /* Variable names. */
320 size_t name_cnt; /* Number of names. */
322 int recno; /* Index of current record. */
323 int sc; /* 1-based column number of starting column for
324 next field to output. */
327 static int fixed_parse_compatible (struct fixed_parsing_state *,
328 struct dls_var_spec **,
329 struct dls_var_spec **);
330 static int fixed_parse_fortran (struct fixed_parsing_state *,
331 struct dls_var_spec **,
332 struct dls_var_spec **);
334 /* Parses all the variable specifications for DATA LIST FIXED,
335 storing them into DLS. Returns nonzero if successful. */
337 parse_fixed (struct data_list_pgm *dls)
339 struct fixed_parsing_state fx;
347 while (lex_match ('/'))
350 if (lex_is_integer ())
352 if (lex_integer () < fx.recno)
354 msg (SE, _("The record number specified, %ld, is "
355 "before the previous record, %d. Data "
356 "fields must be listed in order of "
357 "increasing record number."),
358 lex_integer (), fx.recno - 1);
362 fx.recno = lex_integer ();
368 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
371 if (lex_is_number ())
373 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
376 else if (token == '(')
378 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
383 msg (SE, _("SPSS-like or FORTRAN-like format "
384 "specification expected after variable names."));
388 for (i = 0; i < fx.name_cnt; i++)
392 if (dls->first == NULL)
394 msg (SE, _("At least one variable must be specified."));
397 if (dls->rec_cnt && dls->last->rec > dls->rec_cnt)
399 msg (SE, _("Variables are specified on records that "
400 "should not exist according to RECORDS subcommand."));
403 else if (!dls->rec_cnt)
404 dls->rec_cnt = dls->last->rec;
405 return lex_end_of_command () == CMD_SUCCESS;
408 for (i = 0; i < fx.name_cnt; i++)
414 /* Parses a variable specification in the form 1-10 (A) based on
415 FX and adds specifications to the linked list with head at
416 FIRST and tail at LAST. */
418 fixed_parse_compatible (struct fixed_parsing_state *fx,
419 struct dls_var_spec **first, struct dls_var_spec **last)
421 struct fmt_spec input;
427 if (!lex_force_int ())
432 msg (SE, _("Column positions for fields must be positive."));
438 lex_negative_to_dash ();
441 if (!lex_force_int ())
446 msg (SE, _("Column positions for fields must be positive."));
451 msg (SE, _("The ending column for a field must be "
452 "greater than the starting column."));
461 /* Divide columns evenly. */
462 input.w = (lc - fc + 1) / fx->name_cnt;
463 if ((lc - fc + 1) % fx->name_cnt)
465 msg (SE, _("The %d columns %d-%d "
466 "can't be evenly divided into %d fields."),
467 lc - fc + 1, fc, lc, fx->name_cnt);
471 /* Format specifier. */
474 struct fmt_desc *fdp;
480 input.type = parse_format_specifier_name (&cp, 0);
481 if (input.type == -1)
485 msg (SE, _("A format specifier on this line "
486 "has extra characters on the end."));
496 if (lex_is_integer ())
498 if (lex_integer () < 1)
500 msg (SE, _("The value for number of decimal places "
501 "must be at least 1."));
505 input.d = lex_integer ();
511 fdp = &formats[input.type];
512 if (fdp->n_args < 2 && input.d)
514 msg (SE, _("Input format %s doesn't accept decimal places."),
522 if (!lex_force_match (')'))
530 if (!check_input_specifier (&input, 1))
533 /* Start column for next specification. */
536 /* Width of variables to create. */
537 if (input.type == FMT_A || input.type == FMT_AHEX)
542 /* Create variables and var specs. */
543 for (i = 0; i < fx->name_cnt; i++)
545 struct dls_var_spec *spec;
548 v = dict_create_var (default_dict, fx->name[i], width);
551 convert_fmt_ItoO (&input, &v->print);
553 if (!case_source_is_complex (vfm_source))
558 v = dict_lookup_var_assert (default_dict, fx->name[i]);
559 if (vfm_source == NULL)
561 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
564 if ((width != 0) != (v->width != 0))
566 msg (SE, _("There is already a variable %s of a "
571 if (width != 0 && width != v->width)
573 msg (SE, _("There is already a string variable %s of a "
574 "different width."), fx->name[i]);
579 spec = xmalloc (sizeof *spec);
583 spec->rec = fx->recno;
584 spec->fc = fc + input.w * i;
585 spec->lc = spec->fc + input.w - 1;
586 append_var_spec (first, last, spec);
591 /* Destroy format list F and, if RECURSE is nonzero, all its
594 destroy_fmt_list (struct fmt_list *f, int recurse)
596 struct fmt_list *next;
601 if (recurse && f->f.type == FMT_DESCEND)
602 destroy_fmt_list (f->down, 1);
607 /* Takes a hierarchically structured fmt_list F as constructed by
608 fixed_parse_fortran(), and flattens it, adding the variable
609 specifications to the linked list with head FIRST and tail
610 LAST. NAME_IDX is used to take values from the list of names
611 in FX; it should initially point to a value of 0. */
613 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
614 struct dls_var_spec **first, struct dls_var_spec **last,
619 for (; f; f = f->next)
620 if (f->f.type == FMT_X)
622 else if (f->f.type == FMT_T)
624 else if (f->f.type == FMT_NEWREC)
626 fx->recno += f->count;
630 for (i = 0; i < f->count; i++)
631 if (f->f.type == FMT_DESCEND)
633 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
638 struct dls_var_spec *spec;
642 if (formats[f->f.type].cat & FCAT_STRING)
646 if (*name_idx >= fx->name_cnt)
648 msg (SE, _("The number of format "
649 "specifications exceeds the given number of "
654 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
657 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
661 if (!case_source_is_complex (vfm_source))
664 spec = xmalloc (sizeof *spec);
668 spec->rec = fx->recno;
670 spec->lc = fx->sc + f->f.w - 1;
671 append_var_spec (first, last, spec);
673 convert_fmt_ItoO (&spec->input, &v->print);
681 /* Recursively parses a FORTRAN-like format specification into
682 the linked list with head FIRST and tail TAIL. LEVEL is the
683 level of recursion, starting from 0. Returns the parsed
684 specification if successful, or a null pointer on failure. */
685 static struct fmt_list *
686 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
687 struct dls_var_spec **first,
688 struct dls_var_spec **last)
690 struct fmt_list *head = NULL;
691 struct fmt_list *tail = NULL;
693 lex_force_match ('(');
697 struct fmt_list *new = xmalloc (sizeof *new);
700 /* Append new to list. */
708 if (lex_is_integer ())
710 new->count = lex_integer ();
716 /* Parse format specifier. */
719 new->f.type = FMT_DESCEND;
720 new->down = fixed_parse_fortran_internal (fx, first, last);
721 if (new->down == NULL)
724 else if (lex_match ('/'))
725 new->f.type = FMT_NEWREC;
726 else if (!parse_format_specifier (&new->f, FMTP_ALLOW_XT)
727 || !check_input_specifier (&new->f, 1))
732 lex_force_match (')');
737 destroy_fmt_list (head, 0);
742 /* Parses a FORTRAN-like format specification into the linked
743 list with head FIRST and tail LAST. Returns nonzero if
746 fixed_parse_fortran (struct fixed_parsing_state *fx,
747 struct dls_var_spec **first, struct dls_var_spec **last)
749 struct fmt_list *list;
752 list = fixed_parse_fortran_internal (fx, first, last);
757 dump_fmt_list (fx, list, first, last, &name_idx);
758 destroy_fmt_list (list, 1);
759 if (name_idx < fx->name_cnt)
761 msg (SE, _("There aren't enough format specifications "
762 "to match the number of variable names given."));
769 /* Displays a table giving information on fixed-format variable
770 parsing on DATA LIST. */
771 /* FIXME: The `Columns' column should be divided into three columns,
772 one for the starting column, one for the dash, one for the ending
773 column; then right-justify the starting column and left-justify the
776 dump_fixed_table (const struct dls_var_spec *specs,
777 const struct file_handle *fh, int rec_cnt)
779 const struct dls_var_spec *spec;
783 for (i = 0, spec = specs; spec; spec = spec->next)
785 t = tab_create (4, i + 1, 0);
786 tab_columns (t, TAB_COL_DOWN, 1);
787 tab_headers (t, 0, 0, 1, 0);
788 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
789 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
790 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
791 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
792 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
793 tab_hline (t, TAL_2, 0, 3, 1);
794 tab_dim (t, tab_natural_dimensions);
796 for (i = 1, spec = specs; spec; spec = spec->next, i++)
798 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
799 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
800 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
802 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
803 fmt_to_string (&spec->input));
807 tab_title (t, 1, ngettext ("Reading %d record from file %s.",
808 "Reading %d records from file %s.", rec_cnt),
809 rec_cnt, handle_get_filename (fh));
811 tab_title (t, 1, ngettext ("Reading %d record from the command file.",
812 "Reading %d records from the command file.",
818 /* Free-format parsing. */
820 /* Parses variable specifications for DATA LIST FREE and adds
821 them to the linked list with head FIRST and tail LAST.
822 Returns nonzero only if successful. */
824 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
829 struct fmt_spec input, output;
835 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
840 if (!parse_format_specifier (&input, 0)
841 || !check_input_specifier (&input, 1)
842 || !lex_force_match (')'))
844 for (i = 0; i < name_cnt; i++)
849 convert_fmt_ItoO (&input, &output);
854 input = make_input_format (FMT_F, 8, 0);
855 output = get_format ();
858 if (input.type == FMT_A || input.type == FMT_AHEX)
862 for (i = 0; i < name_cnt; i++)
864 struct dls_var_spec *spec;
867 v = dict_create_var (default_dict, name[i], width);
871 msg (SE, _("%s is a duplicate variable name."), name[i]);
874 v->print = v->write = output;
876 if (!case_source_is_complex (vfm_source))
879 spec = xmalloc (sizeof *spec);
883 str_copy_trunc (spec->name, sizeof spec->name, v->name);
884 append_var_spec (first, last, spec);
886 for (i = 0; i < name_cnt; i++)
891 return lex_end_of_command () == CMD_SUCCESS;
894 /* Displays a table giving information on free-format variable parsing
897 dump_free_table (const struct data_list_pgm *dls,
898 const struct file_handle *fh)
904 struct dls_var_spec *spec;
905 for (i = 0, spec = dls->first; spec; spec = spec->next)
909 t = tab_create (2, i + 1, 0);
910 tab_columns (t, TAB_COL_DOWN, 1);
911 tab_headers (t, 0, 0, 1, 0);
912 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
913 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
914 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
915 tab_hline (t, TAL_2, 0, 1, 1);
916 tab_dim (t, tab_natural_dimensions);
919 struct dls_var_spec *spec;
921 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
923 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
924 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
929 tab_title (t, 1, _("Reading free-form data from file %s."),
930 handle_get_filename (fh));
932 tab_title (t, 1, _("Reading free-form data from the command file."));
937 /* Input procedure. */
939 /* Extracts a field from the current position in the current
940 record. Fields can be unquoted or quoted with single- or
941 double-quote characters. *FIELD is set to the field content.
942 After parsing the field, sets the current position in the
943 record to just past the field and any trailing delimiter.
944 END_BLANK is used internally; it should be initialized by the
945 caller to 0 and left alone afterward. Returns 0 on failure or
946 a 1-based column number indicating the beginning of the field
949 cut_field (const struct data_list_pgm *dls, struct fixed_string *field,
952 struct fixed_string line;
956 if (dfm_eof (dls->reader))
958 if (dls->delim_cnt == 0)
959 dfm_expand_tabs (dls->reader);
960 dfm_get_record (dls->reader, &line);
962 cp = ls_c_str (&line);
963 if (dls->delim_cnt == 0)
965 /* Skip leading whitespace. */
966 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
968 if (cp >= ls_end (&line))
971 /* Handle actual data, whether quoted or unquoted. */
972 if (*cp == '\'' || *cp == '"')
976 field->string = ++cp;
977 while (cp < ls_end (&line) && *cp != quote)
979 field->length = cp - field->string;
980 if (cp < ls_end (&line))
983 msg (SW, _("Quoted string missing terminating `%c'."), quote);
988 while (cp < ls_end (&line)
989 && !isspace ((unsigned char) *cp) && *cp != ',')
991 field->length = cp - field->string;
994 /* Skip trailing whitespace and a single comma if present. */
995 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
997 if (cp < ls_end (&line) && *cp == ',')
1002 if (cp >= ls_end (&line))
1004 int column = dfm_column_start (dls->reader);
1005 /* A blank line or a line that ends in \t has a
1006 trailing blank field. */
1007 if (column == 1 || (column > 1 && cp[-1] == '\t'))
1009 if (*end_blank == 0)
1012 field->string = ls_end (&line);
1014 dfm_forward_record (dls->reader);
1029 while (cp < ls_end (&line)
1030 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1032 field->length = cp - field->string;
1033 if (cp < ls_end (&line))
1038 dfm_forward_columns (dls->reader, field->string - line.string);
1039 column_start = dfm_column_start (dls->reader);
1041 dfm_forward_columns (dls->reader, cp - field->string);
1043 return column_start;
1046 typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *);
1047 static data_list_read_func read_from_data_list_fixed;
1048 static data_list_read_func read_from_data_list_free;
1049 static data_list_read_func read_from_data_list_list;
1051 /* Returns the proper function to read the kind of DATA LIST
1052 data specified by DLS. */
1053 static data_list_read_func *
1054 get_data_list_read_func (const struct data_list_pgm *dls)
1059 return read_from_data_list_fixed;
1062 return read_from_data_list_free;
1065 return read_from_data_list_list;
1073 /* Reads a case from the data file into C, parsing it according
1074 to fixed-format syntax rules in DLS. Returns -1 on success,
1075 -2 at end of file. */
1077 read_from_data_list_fixed (const struct data_list_pgm *dls,
1080 struct dls_var_spec *var_spec = dls->first;
1083 if (dfm_eof (dls->reader))
1085 for (i = 1; i <= dls->rec_cnt; i++)
1087 struct fixed_string line;
1089 if (dfm_eof (dls->reader))
1091 /* Note that this can't occur on the first record. */
1092 msg (SW, _("Partial case of %d of %d records discarded."),
1093 i - 1, dls->rec_cnt);
1096 dfm_expand_tabs (dls->reader);
1097 dfm_get_record (dls->reader, &line);
1099 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1103 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1104 var_spec->fc, var_spec->lc);
1105 di.v = case_data_rw (c, var_spec->fv);
1106 di.flags = DI_IMPLIED_DECIMALS;
1107 di.f1 = var_spec->fc;
1108 di.format = var_spec->input;
1113 dfm_forward_record (dls->reader);
1119 /* Reads a case from the data file into C, parsing it according
1120 to free-format syntax rules in DLS. Returns -1 on success,
1121 -2 at end of file. */
1123 read_from_data_list_free (const struct data_list_pgm *dls,
1126 struct dls_var_spec *var_spec;
1129 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1131 struct fixed_string field;
1134 /* Cut out a field and read in a new record if necessary. */
1137 column = cut_field (dls, &field, &end_blank);
1141 if (!dfm_eof (dls->reader))
1142 dfm_forward_record (dls->reader);
1143 if (dfm_eof (dls->reader))
1145 if (var_spec != dls->first)
1146 msg (SW, _("Partial case discarded. The first variable "
1147 "missing was %s."), var_spec->name);
1155 di.s = ls_c_str (&field);
1156 di.e = ls_end (&field);
1157 di.v = case_data_rw (c, var_spec->fv);
1160 di.format = var_spec->input;
1167 /* Reads a case from the data file and parses it according to
1168 list-format syntax rules. Returns -1 on success, -2 at end of
1171 read_from_data_list_list (const struct data_list_pgm *dls,
1174 struct dls_var_spec *var_spec;
1177 if (dfm_eof (dls->reader))
1180 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1182 struct fixed_string field;
1185 /* Cut out a field and check for end-of-line. */
1186 column = cut_field (dls, &field, &end_blank);
1189 if (get_undefined ())
1190 msg (SW, _("Missing value(s) for all variables from %s onward. "
1191 "These will be filled with the system-missing value "
1192 "or blanks, as appropriate."),
1194 for (; var_spec; var_spec = var_spec->next)
1196 int width = get_format_var_width (&var_spec->input);
1198 case_data_rw (c, var_spec->fv)->f = SYSMIS;
1200 memset (case_data_rw (c, var_spec->fv)->s, ' ', width);
1208 di.s = ls_c_str (&field);
1209 di.e = ls_end (&field);
1210 di.v = case_data_rw (c, var_spec->fv);
1213 di.format = var_spec->input;
1218 dfm_forward_record (dls->reader);
1222 /* Destroys SPEC. */
1224 destroy_dls_var_spec (struct dls_var_spec *spec)
1226 struct dls_var_spec *next;
1228 while (spec != NULL)
1236 /* Destroys DATA LIST transformation PGM. */
1238 data_list_trns_free (struct trns_header *pgm)
1240 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1242 destroy_dls_var_spec (dls->first);
1243 dfm_close_reader (dls->reader);
1246 /* Handle DATA LIST transformation T, parsing data into C. */
1248 data_list_trns_proc (struct trns_header *t, struct ccase *c,
1249 int case_num UNUSED)
1251 struct data_list_pgm *dls = (struct data_list_pgm *) t;
1252 data_list_read_func *read_func;
1255 dfm_push (dls->reader);
1257 read_func = get_data_list_read_func (dls);
1258 retval = read_func (dls, c);
1260 /* Handle end of file. */
1263 /* If we already encountered end of file then this is an
1267 msg (SE, _("Attempt to read past end of file."));
1269 dfm_pop (dls->reader);
1273 /* Otherwise simply note it. */
1279 /* If there was an END subcommand handle it. */
1280 if (dls->end != NULL)
1284 case_data_rw (c, dls->end->fv)->f = 1.0;
1288 case_data_rw (c, dls->end->fv)->f = 0.0;
1291 dfm_pop (dls->reader);
1296 /* Reads all the records from the data file and passes them to
1299 data_list_source_read (struct case_source *source,
1301 write_case_func *write_case, write_case_data wc_data)
1303 struct data_list_pgm *dls = source->aux;
1304 data_list_read_func *read_func = get_data_list_read_func (dls);
1306 dfm_push (dls->reader);
1307 while (read_func (dls, c) != -2)
1308 if (!write_case (wc_data))
1310 dfm_pop (dls->reader);
1313 /* Destroys the source's internal data. */
1315 data_list_source_destroy (struct case_source *source)
1317 data_list_trns_free (source->aux);
1321 const struct case_source_class data_list_source_class =
1325 data_list_source_read,
1326 data_list_source_destroy,
1329 /* REPEATING DATA. */
1331 /* Represents a number or a variable. */
1332 struct rpd_num_or_var
1334 int num; /* Value, or 0. */
1335 struct variable *var; /* Variable, if number==0. */
1338 /* REPEATING DATA private data structure. */
1339 struct repeating_data_trns
1341 struct trns_header h;
1342 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1343 struct dfm_reader *reader; /* Input file, never NULL. */
1345 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1346 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1347 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1348 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1349 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1350 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1352 /* ID subcommand. */
1353 int id_beg, id_end; /* Beginning & end columns. */
1354 struct variable *id_var; /* DATA LIST variable. */
1355 struct fmt_spec id_spec; /* Input format spec. */
1356 union value *id_value; /* ID value. */
1358 write_case_func *write_case;
1359 write_case_data wc_data;
1362 static trns_free_func repeating_data_trns_free;
1363 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1364 static int parse_repeating_data (struct dls_var_spec **,
1365 struct dls_var_spec **);
1366 static void find_variable_input_spec (struct variable *v,
1367 struct fmt_spec *spec);
1369 /* Parses the REPEATING DATA command. */
1371 cmd_repeating_data (void)
1373 struct repeating_data_trns *rpd;
1374 int table = 1; /* Print table? */
1375 bool saw_starts = false; /* Saw STARTS subcommand? */
1376 bool saw_occurs = false; /* Saw OCCURS subcommand? */
1377 bool saw_length = false; /* Saw LENGTH subcommand? */
1378 bool saw_continued = false; /* Saw CONTINUED subcommand? */
1379 bool saw_id = false; /* Saw ID subcommand? */
1380 struct file_handle *const fh = default_handle;
1382 assert (case_source_is_complex (vfm_source));
1384 rpd = xmalloc (sizeof *rpd);
1385 rpd->reader = dfm_open_reader (default_handle);
1386 rpd->first = rpd->last = NULL;
1387 rpd->starts_beg.num = 0;
1388 rpd->starts_beg.var = NULL;
1389 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1390 = rpd->cont_end = rpd->starts_beg;
1391 rpd->id_beg = rpd->id_end = 0;
1393 rpd->id_value = NULL;
1399 if (lex_match_id ("FILE"))
1401 struct file_handle *file;
1408 msg (SE, _("REPEATING DATA must use the same file as its "
1409 "corresponding DATA LIST or FILE TYPE."));
1413 else if (lex_match_id ("STARTS"))
1418 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1423 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1426 lex_negative_to_dash ();
1427 if (lex_match ('-'))
1429 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1432 /* Otherwise, rpd->starts_end is uninitialized. We
1433 will initialize it later from the record length
1434 of the file. We can't do so now because the
1435 file handle may not be specified yet. */
1438 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1439 && rpd->starts_beg.num > rpd->starts_end.num)
1441 msg (SE, _("STARTS beginning column (%d) exceeds "
1442 "STARTS ending column (%d)."),
1443 rpd->starts_beg.num, rpd->starts_end.num);
1447 else if (lex_match_id ("OCCURS"))
1452 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1457 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1460 else if (lex_match_id ("LENGTH"))
1465 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1470 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1473 else if (lex_match_id ("CONTINUED"))
1476 if (saw_continued & 8)
1478 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1483 if (!lex_match ('/'))
1485 if (!parse_num_or_var (&rpd->cont_beg,
1486 "CONTINUED beginning column"))
1489 lex_negative_to_dash ();
1491 && !parse_num_or_var (&rpd->cont_end,
1492 "CONTINUED ending column"))
1495 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1496 && rpd->cont_beg.num > rpd->cont_end.num)
1498 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1499 "CONTINUED ending column (%d)."),
1500 rpd->cont_beg.num, rpd->cont_end.num);
1505 rpd->cont_beg.num = 1;
1507 else if (lex_match_id ("ID"))
1512 msg (SE, _("%s subcommand given multiple times."),"ID");
1517 if (!lex_force_int ())
1519 if (lex_integer () < 1)
1521 msg (SE, _("ID beginning column (%ld) must be positive."),
1525 rpd->id_beg = lex_integer ();
1528 lex_negative_to_dash ();
1530 if (lex_match ('-'))
1532 if (!lex_force_int ())
1534 if (lex_integer () < 1)
1536 msg (SE, _("ID ending column (%ld) must be positive."),
1540 if (lex_integer () < rpd->id_end)
1542 msg (SE, _("ID ending column (%ld) cannot be less than "
1543 "ID beginning column (%d)."),
1544 lex_integer (), rpd->id_beg);
1548 rpd->id_end = lex_integer ();
1551 else rpd->id_end = rpd->id_beg;
1553 if (!lex_force_match ('='))
1555 rpd->id_var = parse_variable ();
1556 if (rpd->id_var == NULL)
1559 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1560 rpd->id_value = xnmalloc (rpd->id_var->nv, sizeof *rpd->id_value);
1562 else if (lex_match_id ("TABLE"))
1564 else if (lex_match_id ("NOTABLE"))
1566 else if (lex_match_id ("DATA"))
1574 if (!lex_force_match ('/'))
1578 /* Comes here when DATA specification encountered. */
1579 if (!saw_starts || !saw_occurs)
1582 msg (SE, _("Missing required specification STARTS."));
1584 msg (SE, _("Missing required specification OCCURS."));
1588 /* Enforce ID restriction. */
1589 if (saw_id && !saw_continued)
1591 msg (SE, _("ID specified without CONTINUED."));
1595 /* Calculate and check starts_end, cont_end if necessary. */
1596 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1598 rpd->starts_end.num = fh != NULL ? handle_get_record_width (fh) : 80;
1599 if (rpd->starts_beg.num != 0
1600 && rpd->starts_beg.num > rpd->starts_end.num)
1602 msg (SE, _("STARTS beginning column (%d) exceeds "
1603 "default STARTS ending column taken from file's "
1604 "record width (%d)."),
1605 rpd->starts_beg.num, rpd->starts_end.num);
1609 if (rpd->cont_end.num == 0 && rpd->cont_end.var == NULL)
1611 rpd->cont_end.num = fh != NULL ? handle_get_record_width (fh) : 80;
1612 if (rpd->cont_beg.num != 0
1613 && rpd->cont_beg.num > rpd->cont_end.num)
1615 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1616 "default CONTINUED ending column taken from file's "
1617 "record width (%d)."),
1618 rpd->cont_beg.num, rpd->cont_end.num);
1624 if (!parse_repeating_data (&rpd->first, &rpd->last))
1627 /* Calculate length if necessary. */
1630 struct dls_var_spec *iter;
1632 for (iter = rpd->first; iter; iter = iter->next)
1633 if (iter->lc > rpd->length.num)
1634 rpd->length.num = iter->lc;
1635 assert (rpd->length.num != 0);
1639 dump_fixed_table (rpd->first, fh, rpd->last->rec);
1641 rpd->h.proc = repeating_data_trns_proc;
1642 rpd->h.free = repeating_data_trns_free;
1643 add_transformation (&rpd->h);
1645 return lex_end_of_command ();
1648 destroy_dls_var_spec (rpd->first);
1649 free (rpd->id_value);
1653 /* Finds the input format specification for variable V and puts
1654 it in SPEC. Because of the way that DATA LIST is structured,
1655 this is nontrivial. */
1657 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1661 for (i = 0; i < n_trns; i++)
1663 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1665 if (pgm->h.proc == data_list_trns_proc)
1667 struct dls_var_spec *iter;
1669 for (iter = pgm->first; iter; iter = iter->next)
1672 *spec = iter->input;
1681 /* Parses a number or a variable name from the syntax file and puts
1682 the results in VALUE. Ensures that the number is at least 1; else
1683 emits an error based on MESSAGE. Returns nonzero only if
1686 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1691 value->var = parse_variable ();
1692 if (value->var == NULL)
1694 if (value->var->type == ALPHA)
1696 msg (SE, _("String variable not allowed here."));
1700 else if (lex_is_integer ())
1702 value->num = lex_integer ();
1706 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1712 msg (SE, _("Variable or integer expected for %s."), message);
1718 /* Parses data specifications for repeating data groups, adding
1719 them to the linked list with head FIRST and tail LAST.
1720 Returns nonzero only if successful. */
1722 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1724 struct fixed_parsing_state fx;
1730 while (token != '.')
1732 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1735 if (lex_is_number ())
1737 if (!fixed_parse_compatible (&fx, first, last))
1740 else if (token == '(')
1742 if (!fixed_parse_fortran (&fx, first, last))
1747 msg (SE, _("SPSS-like or FORTRAN-like format "
1748 "specification expected after variable names."));
1752 for (i = 0; i < fx.name_cnt; i++)
1760 for (i = 0; i < fx.name_cnt; i++)
1766 /* Obtains the real value for rpd_num_or_var N in case C and returns
1767 it. The valid range is nonnegative numbers, but numbers outside
1768 this range can be returned and should be handled by the caller as
1771 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1775 double v = case_num (c, n->var->fv);
1776 return v != SYSMIS && v >= INT_MIN && v <= INT_MAX ? v : -1;
1782 /* Parameter record passed to rpd_parse_record(). */
1783 struct rpd_parse_info
1785 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1786 const char *line; /* Line being parsed. */
1787 size_t len; /* Line length. */
1788 int beg, end; /* First and last column of first occurrence. */
1789 int ofs; /* Column offset between repeated occurrences. */
1790 struct ccase *c; /* Case to fill in. */
1791 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1792 int max_occurs; /* Max number of occurrences to parse. */
1795 /* Parses one record of repeated data and outputs corresponding
1796 cases. Returns number of occurrences parsed up to the
1797 maximum specified in INFO. */
1799 rpd_parse_record (const struct rpd_parse_info *info)
1801 struct repeating_data_trns *t = info->trns;
1802 int cur = info->beg;
1805 /* Handle record ID values. */
1808 union value id_temp[MAX_ELEMS_PER_VALUE];
1810 /* Parse record ID into V. */
1814 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1815 di.v = info->verify_id ? id_temp : t->id_value;
1818 di.format = t->id_spec;
1825 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1827 char expected_str [MAX_FORMATTED_LEN + 1];
1828 char actual_str [MAX_FORMATTED_LEN + 1];
1830 data_out (expected_str, &t->id_var->print, t->id_value);
1831 expected_str[t->id_var->print.w] = '\0';
1833 data_out (actual_str, &t->id_var->print, id_temp);
1834 actual_str[t->id_var->print.w] = '\0';
1837 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1838 actual_str, expected_str);
1844 /* Iterate over the set of expected occurrences and record each of
1845 them as a separate case. FIXME: We need to execute any
1846 transformations that follow the current one. */
1850 for (occurrences = 0; occurrences < info->max_occurs; )
1852 if (cur + info->ofs > info->end + 1)
1857 struct dls_var_spec *var_spec = t->first;
1859 for (; var_spec; var_spec = var_spec->next)
1861 int fc = var_spec->fc - 1 + cur;
1862 int lc = var_spec->lc - 1 + cur;
1864 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1869 _("Variable %s starting in column %d extends "
1870 "beyond physical record length of %d."),
1871 var_spec->v->name, fc, info->len);
1877 data_in_finite_line (&di, info->line, info->len, fc, lc);
1878 di.v = case_data_rw (info->c, var_spec->fv);
1881 di.format = var_spec->input;
1891 if (!t->write_case (t->wc_data))
1899 /* Reads one set of repetitions of the elements in the REPEATING
1900 DATA structure. Returns -1 on success, -2 on end of file or
1903 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c,
1904 int case_num UNUSED)
1906 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1908 struct fixed_string line; /* Current record. */
1910 int starts_beg; /* Starting column. */
1911 int starts_end; /* Ending column. */
1912 int occurs; /* Number of repetitions. */
1913 int length; /* Length of each occurrence. */
1914 int cont_beg; /* Starting column for continuation lines. */
1915 int cont_end; /* Ending column for continuation lines. */
1917 int occurs_left; /* Number of occurrences remaining. */
1919 int code; /* Return value from rpd_parse_record(). */
1921 int skip_first_record = 0;
1923 dfm_push (t->reader);
1925 /* Read the current record. */
1926 dfm_reread_record (t->reader, 1);
1927 dfm_expand_tabs (t->reader);
1928 if (dfm_eof (t->reader))
1930 dfm_get_record (t->reader, &line);
1931 dfm_forward_record (t->reader);
1933 /* Calculate occurs, length. */
1934 occurs_left = occurs = realize_value (&t->occurs, c);
1937 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1940 starts_beg = realize_value (&t->starts_beg, c);
1941 if (starts_beg <= 0)
1943 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1948 starts_end = realize_value (&t->starts_end, c);
1949 if (starts_end < starts_beg)
1951 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1952 "beginning column (%d)."),
1953 starts_end, starts_beg);
1954 skip_first_record = 1;
1956 length = realize_value (&t->length, c);
1959 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1961 occurs = occurs_left = 1;
1963 cont_beg = realize_value (&t->cont_beg, c);
1966 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1971 cont_end = realize_value (&t->cont_end, c);
1972 if (cont_end < cont_beg)
1974 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1975 "beginning column (%d)."),
1976 cont_end, cont_beg);
1980 /* Parse the first record. */
1981 if (!skip_first_record)
1983 struct rpd_parse_info info;
1985 info.line = ls_c_str (&line);
1986 info.len = ls_length (&line);
1987 info.beg = starts_beg;
1988 info.end = starts_end;
1992 info.max_occurs = occurs_left;
1993 code = rpd_parse_record (&info);
1996 occurs_left -= code;
1998 else if (cont_beg == 0)
2001 /* Make sure, if some occurrences are left, that we have
2002 continuation records. */
2003 if (occurs_left > 0 && cont_beg == 0)
2006 _("Number of repetitions specified on OCCURS (%d) "
2007 "exceed number of repetitions available in "
2008 "space on STARTS (%d), and CONTINUED not specified."),
2009 occurs, (starts_end - starts_beg + 1) / length);
2013 /* Go on to additional records. */
2014 while (occurs_left != 0)
2016 struct rpd_parse_info info;
2018 assert (occurs_left >= 0);
2020 /* Read in another record. */
2021 if (dfm_eof (t->reader))
2024 _("Unexpected end of file with %d repetitions "
2025 "remaining out of %d."),
2026 occurs_left, occurs);
2029 dfm_expand_tabs (t->reader);
2030 dfm_get_record (t->reader, &line);
2031 dfm_forward_record (t->reader);
2033 /* Parse this record. */
2035 info.line = ls_c_str (&line);
2036 info.len = ls_length (&line);
2037 info.beg = cont_beg;
2038 info.end = cont_end;
2042 info.max_occurs = occurs_left;
2043 code = rpd_parse_record (&info);;
2046 occurs_left -= code;
2049 dfm_pop (t->reader);
2051 /* FIXME: This is a kluge until we've implemented multiplexing of
2056 /* Frees a REPEATING DATA transformation. */
2058 repeating_data_trns_free (struct trns_header *rpd_)
2060 struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_;
2062 destroy_dls_var_spec (rpd->first);
2063 dfm_close_reader (rpd->reader);
2064 free (rpd->id_value);
2067 /* Lets repeating_data_trns_proc() know how to write the cases
2068 that it composes. Not elegant. */
2070 repeating_data_set_write_case (struct trns_header *trns,
2071 write_case_func *write_case,
2072 write_case_data wc_data)
2074 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
2076 assert (trns->proc == repeating_data_trns_proc);
2077 t->write_case = write_case;
2078 t->wc_data = wc_data;