1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <language/data-io/data-list.h>
22 #include <libpspp/message.h>
27 #include <libpspp/alloc.h>
28 #include <data/case.h>
29 #include <language/command.h>
30 #include <libpspp/compiler.h>
31 #include <data/data-in.h>
32 #include <libpspp/debug-print.h>
33 #include <language/data-io/data-reader.h>
34 #include <data/dictionary.h>
35 #include <libpspp/message.h>
36 #include <language/data-io/file-handle.h>
37 #include <data/format.h>
38 #include <language/lexer/lexer.h>
39 #include <libpspp/misc.h>
40 #include <data/settings.h>
41 #include <libpspp/str.h>
42 #include <output/table.h>
43 #include <data/variable.h>
44 #include <procedure.h>
47 #define _(msgid) gettext (msgid)
49 /* Utility function. */
51 /* FIXME: Either REPEATING DATA must be the last transformation, or we
52 must multiplex the transformations that follow (i.e., perform them
53 for every case that we produce from a repetition instance).
54 Currently we do neither. We should do one or the other. */
56 /* Describes how to parse one variable. */
59 struct dls_var_spec *next; /* Next specification in list. */
61 /* Both free and fixed formats. */
62 struct fmt_spec input; /* Input format of this field. */
63 struct variable *v; /* Associated variable. Used only in
64 parsing. Not safe later. */
65 int fv; /* First value in case. */
67 /* Fixed format only. */
68 int rec; /* Record number (1-based). */
69 int fc, lc; /* Column numbers in record. */
71 /* Free format only. */
72 char name[LONG_NAME_LEN + 1]; /* Name of variable. */
75 /* Constants for DATA LIST type. */
76 /* Must match table in cmd_data_list(). */
84 /* DATA LIST private data structure. */
87 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
88 struct dfm_reader *reader; /* Data file reader. */
90 int type; /* A DLS_* constant. */
91 struct variable *end; /* Variable specified on END subcommand. */
92 int rec_cnt; /* Number of records. */
93 size_t case_size; /* Case size in bytes. */
94 char *delims; /* Delimiters if any; not null-terminated. */
95 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
98 static const struct case_source_class data_list_source_class;
100 static int parse_fixed (struct data_list_pgm *);
101 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
102 static void dump_fixed_table (const struct dls_var_spec *,
103 const struct file_handle *, int rec_cnt);
104 static void dump_free_table (const struct data_list_pgm *,
105 const struct file_handle *);
106 static void destroy_dls_var_spec (struct dls_var_spec *);
107 static trns_free_func data_list_trns_free;
108 static trns_proc_func data_list_trns_proc;
110 /* Message title for REPEATING DATA. */
111 #define RPD_ERR "REPEATING DATA: "
116 struct data_list_pgm *dls;
117 int table = -1; /* Print table if nonzero, -1=undecided. */
118 struct file_handle *fh = fh_inline_file ();
120 if (!case_source_is_complex (vfm_source))
121 discard_variables ();
123 dls = xmalloc (sizeof *dls);
130 dls->first = dls->last = NULL;
134 if (lex_match_id ("FILE"))
137 fh = fh_parse (FH_REF_FILE | FH_REF_INLINE);
140 if (case_source_is_class (vfm_source, &file_type_source_class)
141 && fh != fh_get_default_handle ())
143 msg (SE, _("DATA LIST must use the same file "
144 "as the enclosing FILE TYPE."));
148 else if (lex_match_id ("RECORDS"))
152 if (!lex_force_int ())
154 dls->rec_cnt = lex_integer ();
158 else if (lex_match_id ("END"))
162 msg (SE, _("The END subcommand may only be specified once."));
167 if (!lex_force_id ())
169 dls->end = dict_lookup_var (default_dict, tokid);
171 dls->end = dict_create_var_assert (default_dict, tokid, 0);
174 else if (token == T_ID)
176 if (lex_match_id ("NOTABLE"))
178 else if (lex_match_id ("TABLE"))
183 if (lex_match_id ("FIXED"))
185 else if (lex_match_id ("FREE"))
187 else if (lex_match_id ("LIST"))
197 msg (SE, _("Only one of FIXED, FREE, or LIST may "
203 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
206 while (!lex_match (')'))
210 if (lex_match_id ("TAB"))
212 else if (token == T_STRING && tokstr.length == 1)
214 delim = tokstr.string[0];
223 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
224 dls->delims[dls->delim_cnt++] = delim;
238 dls->case_size = dict_get_case_size (default_dict);
239 fh_set_default_handle (fh);
242 dls->type = DLS_FIXED;
246 if (dls->type == DLS_FREE)
252 if (dls->type == DLS_FIXED)
254 if (!parse_fixed (dls))
257 dump_fixed_table (dls->first, fh, dls->rec_cnt);
261 if (!parse_free (&dls->first, &dls->last))
264 dump_free_table (dls, fh);
267 dls->reader = dfm_open_reader (fh);
268 if (dls->reader == NULL)
271 if (vfm_source != NULL)
272 add_transformation (data_list_trns_proc, data_list_trns_free, dls);
274 vfm_source = create_case_source (&data_list_source_class, dls);
279 data_list_trns_free (dls);
280 return CMD_CASCADING_FAILURE;
283 /* Adds SPEC to the linked list with head at FIRST and tail at
286 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
287 struct dls_var_spec *spec)
294 (*last)->next = spec;
298 /* Fixed-format parsing. */
300 /* Used for chaining together fortran-like format specifiers. */
303 struct fmt_list *next;
306 struct fmt_list *down;
309 /* State of parsing DATA LIST. */
310 struct fixed_parsing_state
312 char **name; /* Variable names. */
313 size_t name_cnt; /* Number of names. */
315 int recno; /* Index of current record. */
316 int sc; /* 1-based column number of starting column for
317 next field to output. */
320 static int fixed_parse_compatible (struct fixed_parsing_state *,
321 struct dls_var_spec **,
322 struct dls_var_spec **);
323 static int fixed_parse_fortran (struct fixed_parsing_state *,
324 struct dls_var_spec **,
325 struct dls_var_spec **);
327 /* Parses all the variable specifications for DATA LIST FIXED,
328 storing them into DLS. Returns nonzero if successful. */
330 parse_fixed (struct data_list_pgm *dls)
332 struct fixed_parsing_state fx;
340 while (lex_match ('/'))
343 if (lex_is_integer ())
345 if (lex_integer () < fx.recno)
347 msg (SE, _("The record number specified, %ld, is "
348 "before the previous record, %d. Data "
349 "fields must be listed in order of "
350 "increasing record number."),
351 lex_integer (), fx.recno - 1);
355 fx.recno = lex_integer ();
361 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
364 if (lex_is_number ())
366 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
369 else if (token == '(')
371 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
376 msg (SE, _("SPSS-like or FORTRAN-like format "
377 "specification expected after variable names."));
381 for (i = 0; i < fx.name_cnt; i++)
385 if (dls->first == NULL)
387 msg (SE, _("At least one variable must be specified."));
390 if (dls->rec_cnt && dls->last->rec > dls->rec_cnt)
392 msg (SE, _("Variables are specified on records that "
393 "should not exist according to RECORDS subcommand."));
396 else if (!dls->rec_cnt)
397 dls->rec_cnt = dls->last->rec;
398 return lex_end_of_command () == CMD_SUCCESS;
401 for (i = 0; i < fx.name_cnt; i++)
407 /* Parses a variable specification in the form 1-10 (A) based on
408 FX and adds specifications to the linked list with head at
409 FIRST and tail at LAST. */
411 fixed_parse_compatible (struct fixed_parsing_state *fx,
412 struct dls_var_spec **first, struct dls_var_spec **last)
414 struct fmt_spec input;
420 if (!lex_force_int ())
425 msg (SE, _("Column positions for fields must be positive."));
431 lex_negative_to_dash ();
434 if (!lex_force_int ())
439 msg (SE, _("Column positions for fields must be positive."));
444 msg (SE, _("The ending column for a field must be "
445 "greater than the starting column."));
454 /* Divide columns evenly. */
455 input.w = (lc - fc + 1) / fx->name_cnt;
456 if ((lc - fc + 1) % fx->name_cnt)
458 msg (SE, _("The %d columns %d-%d "
459 "can't be evenly divided into %d fields."),
460 lc - fc + 1, fc, lc, fx->name_cnt);
464 /* Format specifier. */
467 struct fmt_desc *fdp;
473 input.type = parse_format_specifier_name (&cp, 0);
474 if (input.type == -1)
478 msg (SE, _("A format specifier on this line "
479 "has extra characters on the end."));
489 if (lex_is_integer ())
491 if (lex_integer () < 1)
493 msg (SE, _("The value for number of decimal places "
494 "must be at least 1."));
498 input.d = lex_integer ();
504 fdp = &formats[input.type];
505 if (fdp->n_args < 2 && input.d)
507 msg (SE, _("Input format %s doesn't accept decimal places."),
515 if (!lex_force_match (')'))
523 if (!check_input_specifier (&input, 1))
526 /* Start column for next specification. */
529 /* Width of variables to create. */
530 if (input.type == FMT_A || input.type == FMT_AHEX)
535 /* Create variables and var specs. */
536 for (i = 0; i < fx->name_cnt; i++)
538 struct dls_var_spec *spec;
541 v = dict_create_var (default_dict, fx->name[i], width);
544 convert_fmt_ItoO (&input, &v->print);
546 if (!case_source_is_complex (vfm_source))
551 v = dict_lookup_var_assert (default_dict, fx->name[i]);
552 if (vfm_source == NULL)
554 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
557 if ((width != 0) != (v->width != 0))
559 msg (SE, _("There is already a variable %s of a "
564 if (width != 0 && width != v->width)
566 msg (SE, _("There is already a string variable %s of a "
567 "different width."), fx->name[i]);
572 spec = xmalloc (sizeof *spec);
576 spec->rec = fx->recno;
577 spec->fc = fc + input.w * i;
578 spec->lc = spec->fc + input.w - 1;
579 append_var_spec (first, last, spec);
584 /* Destroy format list F and, if RECURSE is nonzero, all its
587 destroy_fmt_list (struct fmt_list *f, int recurse)
589 struct fmt_list *next;
594 if (recurse && f->f.type == FMT_DESCEND)
595 destroy_fmt_list (f->down, 1);
600 /* Takes a hierarchically structured fmt_list F as constructed by
601 fixed_parse_fortran(), and flattens it, adding the variable
602 specifications to the linked list with head FIRST and tail
603 LAST. NAME_IDX is used to take values from the list of names
604 in FX; it should initially point to a value of 0. */
606 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
607 struct dls_var_spec **first, struct dls_var_spec **last,
612 for (; f; f = f->next)
613 if (f->f.type == FMT_X)
615 else if (f->f.type == FMT_T)
617 else if (f->f.type == FMT_NEWREC)
619 fx->recno += f->count;
623 for (i = 0; i < f->count; i++)
624 if (f->f.type == FMT_DESCEND)
626 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
631 struct dls_var_spec *spec;
635 if (formats[f->f.type].cat & FCAT_STRING)
639 if (*name_idx >= fx->name_cnt)
641 msg (SE, _("The number of format "
642 "specifications exceeds the given number of "
647 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
650 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
654 if (!case_source_is_complex (vfm_source))
657 spec = xmalloc (sizeof *spec);
661 spec->rec = fx->recno;
663 spec->lc = fx->sc + f->f.w - 1;
664 append_var_spec (first, last, spec);
666 convert_fmt_ItoO (&spec->input, &v->print);
674 /* Recursively parses a FORTRAN-like format specification into
675 the linked list with head FIRST and tail TAIL. LEVEL is the
676 level of recursion, starting from 0. Returns the parsed
677 specification if successful, or a null pointer on failure. */
678 static struct fmt_list *
679 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
680 struct dls_var_spec **first,
681 struct dls_var_spec **last)
683 struct fmt_list *head = NULL;
684 struct fmt_list *tail = NULL;
686 lex_force_match ('(');
690 struct fmt_list *new = xmalloc (sizeof *new);
693 /* Append new to list. */
701 if (lex_is_integer ())
703 new->count = lex_integer ();
709 /* Parse format specifier. */
712 new->f.type = FMT_DESCEND;
713 new->down = fixed_parse_fortran_internal (fx, first, last);
714 if (new->down == NULL)
717 else if (lex_match ('/'))
718 new->f.type = FMT_NEWREC;
719 else if (!parse_format_specifier (&new->f, FMTP_ALLOW_XT)
720 || !check_input_specifier (&new->f, 1))
725 lex_force_match (')');
730 destroy_fmt_list (head, 0);
735 /* Parses a FORTRAN-like format specification into the linked
736 list with head FIRST and tail LAST. Returns nonzero if
739 fixed_parse_fortran (struct fixed_parsing_state *fx,
740 struct dls_var_spec **first, struct dls_var_spec **last)
742 struct fmt_list *list;
745 list = fixed_parse_fortran_internal (fx, first, last);
750 dump_fmt_list (fx, list, first, last, &name_idx);
751 destroy_fmt_list (list, 1);
752 if (name_idx < fx->name_cnt)
754 msg (SE, _("There aren't enough format specifications "
755 "to match the number of variable names given."));
762 /* Displays a table giving information on fixed-format variable
763 parsing on DATA LIST. */
764 /* FIXME: The `Columns' column should be divided into three columns,
765 one for the starting column, one for the dash, one for the ending
766 column; then right-justify the starting column and left-justify the
769 dump_fixed_table (const struct dls_var_spec *specs,
770 const struct file_handle *fh, int rec_cnt)
772 const struct dls_var_spec *spec;
776 for (i = 0, spec = specs; spec; spec = spec->next)
778 t = tab_create (4, i + 1, 0);
779 tab_columns (t, TAB_COL_DOWN, 1);
780 tab_headers (t, 0, 0, 1, 0);
781 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
782 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
783 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
784 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
785 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
786 tab_hline (t, TAL_2, 0, 3, 1);
787 tab_dim (t, tab_natural_dimensions);
789 for (i = 1, spec = specs; spec; spec = spec->next, i++)
791 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
792 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
793 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
795 tab_text (t, 3, i, TAB_LEFT | TAB_FIX,
796 fmt_to_string (&spec->input));
799 tab_title (t, ngettext ("Reading %d record from %s.",
800 "Reading %d records from %s.", rec_cnt),
801 rec_cnt, fh_get_name (fh));
805 /* Free-format parsing. */
807 /* Parses variable specifications for DATA LIST FREE and adds
808 them to the linked list with head FIRST and tail LAST.
809 Returns nonzero only if successful. */
811 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
816 struct fmt_spec input, output;
822 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
827 if (!parse_format_specifier (&input, 0)
828 || !check_input_specifier (&input, 1)
829 || !lex_force_match (')'))
831 for (i = 0; i < name_cnt; i++)
836 convert_fmt_ItoO (&input, &output);
841 input = make_input_format (FMT_F, 8, 0);
842 output = *get_format ();
845 if (input.type == FMT_A || input.type == FMT_AHEX)
849 for (i = 0; i < name_cnt; i++)
851 struct dls_var_spec *spec;
854 v = dict_create_var (default_dict, name[i], width);
858 msg (SE, _("%s is a duplicate variable name."), name[i]);
861 v->print = v->write = output;
863 if (!case_source_is_complex (vfm_source))
866 spec = xmalloc (sizeof *spec);
870 str_copy_trunc (spec->name, sizeof spec->name, v->name);
871 append_var_spec (first, last, spec);
873 for (i = 0; i < name_cnt; i++)
878 return lex_end_of_command () == CMD_SUCCESS;
881 /* Displays a table giving information on free-format variable parsing
884 dump_free_table (const struct data_list_pgm *dls,
885 const struct file_handle *fh)
891 struct dls_var_spec *spec;
892 for (i = 0, spec = dls->first; spec; spec = spec->next)
896 t = tab_create (2, i + 1, 0);
897 tab_columns (t, TAB_COL_DOWN, 1);
898 tab_headers (t, 0, 0, 1, 0);
899 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
900 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
901 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
902 tab_hline (t, TAL_2, 0, 1, 1);
903 tab_dim (t, tab_natural_dimensions);
906 struct dls_var_spec *spec;
908 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
910 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
911 tab_text (t, 1, i, TAB_LEFT | TAB_FIX, fmt_to_string (&spec->input));
915 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
920 /* Input procedure. */
922 /* Extracts a field from the current position in the current
923 record. Fields can be unquoted or quoted with single- or
924 double-quote characters. *FIELD is set to the field content.
925 After parsing the field, sets the current position in the
926 record to just past the field and any trailing delimiter.
927 END_BLANK is used internally; it should be initialized by the
928 caller to 0 and left alone afterward. Returns 0 on failure or
929 a 1-based column number indicating the beginning of the field
932 cut_field (const struct data_list_pgm *dls, struct fixed_string *field,
935 struct fixed_string line;
939 if (dfm_eof (dls->reader))
941 if (dls->delim_cnt == 0)
942 dfm_expand_tabs (dls->reader);
943 dfm_get_record (dls->reader, &line);
945 cp = ls_c_str (&line);
946 if (dls->delim_cnt == 0)
948 /* Skip leading whitespace. */
949 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
951 if (cp >= ls_end (&line))
954 /* Handle actual data, whether quoted or unquoted. */
955 if (*cp == '\'' || *cp == '"')
959 field->string = ++cp;
960 while (cp < ls_end (&line) && *cp != quote)
962 field->length = cp - field->string;
963 if (cp < ls_end (&line))
966 msg (SW, _("Quoted string missing terminating `%c'."), quote);
971 while (cp < ls_end (&line)
972 && !isspace ((unsigned char) *cp) && *cp != ',')
974 field->length = cp - field->string;
977 /* Skip trailing whitespace and a single comma if present. */
978 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
980 if (cp < ls_end (&line) && *cp == ',')
985 if (cp >= ls_end (&line))
987 int column = dfm_column_start (dls->reader);
988 /* A blank line or a line that ends in \t has a
989 trailing blank field. */
990 if (column == 1 || (column > 1 && cp[-1] == '\t'))
995 field->string = ls_end (&line);
997 dfm_forward_record (dls->reader);
1012 while (cp < ls_end (&line)
1013 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1015 field->length = cp - field->string;
1016 if (cp < ls_end (&line))
1021 dfm_forward_columns (dls->reader, field->string - line.string);
1022 column_start = dfm_column_start (dls->reader);
1024 dfm_forward_columns (dls->reader, cp - field->string);
1026 return column_start;
1029 static bool read_from_data_list_fixed (const struct data_list_pgm *,
1031 static bool read_from_data_list_free (const struct data_list_pgm *,
1033 static bool read_from_data_list_list (const struct data_list_pgm *,
1036 /* Reads a case from DLS into C.
1037 Returns true if successful, false at end of file or on I/O error. */
1039 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
1043 dfm_push (dls->reader);
1047 retval = read_from_data_list_fixed (dls, c);
1050 retval = read_from_data_list_free (dls, c);
1053 retval = read_from_data_list_list (dls, c);
1058 dfm_pop (dls->reader);
1063 /* Reads a case from the data file into C, parsing it according
1064 to fixed-format syntax rules in DLS.
1065 Returns true if successful, false at end of file or on I/O error. */
1067 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
1069 struct dls_var_spec *var_spec = dls->first;
1072 if (dfm_eof (dls->reader))
1074 for (i = 1; i <= dls->rec_cnt; i++)
1076 struct fixed_string line;
1078 if (dfm_eof (dls->reader))
1080 /* Note that this can't occur on the first record. */
1081 msg (SW, _("Partial case of %d of %d records discarded."),
1082 i - 1, dls->rec_cnt);
1085 dfm_expand_tabs (dls->reader);
1086 dfm_get_record (dls->reader, &line);
1088 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1092 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1093 var_spec->fc, var_spec->lc);
1094 di.v = case_data_rw (c, var_spec->fv);
1095 di.flags = DI_IMPLIED_DECIMALS;
1096 di.f1 = var_spec->fc;
1097 di.format = var_spec->input;
1102 dfm_forward_record (dls->reader);
1108 /* Reads a case from the data file into C, parsing it according
1109 to free-format syntax rules in DLS.
1110 Returns true if successful, false at end of file or on I/O error. */
1112 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
1114 struct dls_var_spec *var_spec;
1117 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1119 struct fixed_string field;
1122 /* Cut out a field and read in a new record if necessary. */
1125 column = cut_field (dls, &field, &end_blank);
1129 if (!dfm_eof (dls->reader))
1130 dfm_forward_record (dls->reader);
1131 if (dfm_eof (dls->reader))
1133 if (var_spec != dls->first)
1134 msg (SW, _("Partial case discarded. The first variable "
1135 "missing was %s."), var_spec->name);
1143 di.s = ls_c_str (&field);
1144 di.e = ls_end (&field);
1145 di.v = case_data_rw (c, var_spec->fv);
1148 di.format = var_spec->input;
1155 /* Reads a case from the data file and parses it according to
1156 list-format syntax rules.
1157 Returns true if successful, false at end of file or on I/O error. */
1159 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
1161 struct dls_var_spec *var_spec;
1164 if (dfm_eof (dls->reader))
1167 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1169 struct fixed_string field;
1172 /* Cut out a field and check for end-of-line. */
1173 column = cut_field (dls, &field, &end_blank);
1176 if (get_undefined ())
1177 msg (SW, _("Missing value(s) for all variables from %s onward. "
1178 "These will be filled with the system-missing value "
1179 "or blanks, as appropriate."),
1181 for (; var_spec; var_spec = var_spec->next)
1183 int width = get_format_var_width (&var_spec->input);
1185 case_data_rw (c, var_spec->fv)->f = SYSMIS;
1187 memset (case_data_rw (c, var_spec->fv)->s, ' ', width);
1195 di.s = ls_c_str (&field);
1196 di.e = ls_end (&field);
1197 di.v = case_data_rw (c, var_spec->fv);
1200 di.format = var_spec->input;
1205 dfm_forward_record (dls->reader);
1209 /* Destroys SPEC. */
1211 destroy_dls_var_spec (struct dls_var_spec *spec)
1213 struct dls_var_spec *next;
1215 while (spec != NULL)
1223 /* Destroys DATA LIST transformation DLS.
1224 Returns true if successful, false if an I/O error occurred. */
1226 data_list_trns_free (void *dls_)
1228 struct data_list_pgm *dls = dls_;
1230 destroy_dls_var_spec (dls->first);
1231 dfm_close_reader (dls->reader);
1236 /* Handle DATA LIST transformation DLS, parsing data into C. */
1238 data_list_trns_proc (void *dls_, struct ccase *c, int case_num UNUSED)
1240 struct data_list_pgm *dls = dls_;
1243 if (read_from_data_list (dls, c))
1244 retval = TRNS_CONTINUE;
1245 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
1247 /* An I/O error, or encountering end of file for a second
1248 time, should be escalated into a more serious error. */
1249 retval = TRNS_ERROR;
1252 retval = TRNS_DROP_CASE;
1254 /* If there was an END subcommand handle it. */
1255 if (dls->end != NULL)
1257 double *end = &case_data_rw (c, dls->end->fv)->f;
1258 if (retval == TRNS_DROP_CASE)
1261 retval = TRNS_CONTINUE;
1270 /* Reads all the records from the data file and passes them to
1272 Returns true if successful, false if an I/O error occurred. */
1274 data_list_source_read (struct case_source *source,
1276 write_case_func *write_case, write_case_data wc_data)
1278 struct data_list_pgm *dls = source->aux;
1284 if (!read_from_data_list (dls, c))
1285 return !dfm_reader_error (dls->reader);
1287 dfm_push (dls->reader);
1288 ok = write_case (wc_data);
1289 dfm_pop (dls->reader);
1295 /* Destroys the source's internal data. */
1297 data_list_source_destroy (struct case_source *source)
1299 data_list_trns_free (source->aux);
1302 static const struct case_source_class data_list_source_class =
1306 data_list_source_read,
1307 data_list_source_destroy,
1310 /* REPEATING DATA. */
1312 /* Represents a number or a variable. */
1313 struct rpd_num_or_var
1315 int num; /* Value, or 0. */
1316 struct variable *var; /* Variable, if number==0. */
1319 /* REPEATING DATA private data structure. */
1320 struct repeating_data_trns
1322 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1323 struct dfm_reader *reader; /* Input file, never NULL. */
1325 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1326 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1327 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1328 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1329 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1330 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1332 /* ID subcommand. */
1333 int id_beg, id_end; /* Beginning & end columns. */
1334 struct variable *id_var; /* DATA LIST variable. */
1335 struct fmt_spec id_spec; /* Input format spec. */
1336 union value *id_value; /* ID value. */
1338 write_case_func *write_case;
1339 write_case_data wc_data;
1342 static trns_free_func repeating_data_trns_free;
1343 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1344 static int parse_repeating_data (struct dls_var_spec **,
1345 struct dls_var_spec **);
1346 static void find_variable_input_spec (struct variable *v,
1347 struct fmt_spec *spec);
1349 /* Parses the REPEATING DATA command. */
1351 cmd_repeating_data (void)
1353 struct repeating_data_trns *rpd;
1354 int table = 1; /* Print table? */
1355 bool saw_starts = false; /* Saw STARTS subcommand? */
1356 bool saw_occurs = false; /* Saw OCCURS subcommand? */
1357 bool saw_length = false; /* Saw LENGTH subcommand? */
1358 bool saw_continued = false; /* Saw CONTINUED subcommand? */
1359 bool saw_id = false; /* Saw ID subcommand? */
1360 struct file_handle *const fh = fh_get_default_handle ();
1362 assert (case_source_is_complex (vfm_source));
1364 rpd = xmalloc (sizeof *rpd);
1365 rpd->reader = dfm_open_reader (fh);
1366 rpd->first = rpd->last = NULL;
1367 rpd->starts_beg.num = 0;
1368 rpd->starts_beg.var = NULL;
1369 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1370 = rpd->cont_end = rpd->starts_beg;
1371 rpd->id_beg = rpd->id_end = 0;
1373 rpd->id_value = NULL;
1379 if (lex_match_id ("FILE"))
1381 struct file_handle *file;
1383 file = fh_parse (FH_REF_FILE | FH_REF_INLINE);
1388 msg (SE, _("REPEATING DATA must use the same file as its "
1389 "corresponding DATA LIST or FILE TYPE."));
1393 else if (lex_match_id ("STARTS"))
1398 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1403 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1406 lex_negative_to_dash ();
1407 if (lex_match ('-'))
1409 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1412 /* Otherwise, rpd->starts_end is uninitialized. We
1413 will initialize it later from the record length
1414 of the file. We can't do so now because the
1415 file handle may not be specified yet. */
1418 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1419 && rpd->starts_beg.num > rpd->starts_end.num)
1421 msg (SE, _("STARTS beginning column (%d) exceeds "
1422 "STARTS ending column (%d)."),
1423 rpd->starts_beg.num, rpd->starts_end.num);
1427 else if (lex_match_id ("OCCURS"))
1432 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1437 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1440 else if (lex_match_id ("LENGTH"))
1445 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1450 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1453 else if (lex_match_id ("CONTINUED"))
1458 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1461 saw_continued = true;
1463 if (!lex_match ('/'))
1465 if (!parse_num_or_var (&rpd->cont_beg,
1466 "CONTINUED beginning column"))
1469 lex_negative_to_dash ();
1471 && !parse_num_or_var (&rpd->cont_end,
1472 "CONTINUED ending column"))
1475 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1476 && rpd->cont_beg.num > rpd->cont_end.num)
1478 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1479 "CONTINUED ending column (%d)."),
1480 rpd->cont_beg.num, rpd->cont_end.num);
1485 rpd->cont_beg.num = 1;
1487 else if (lex_match_id ("ID"))
1492 msg (SE, _("%s subcommand given multiple times."),"ID");
1497 if (!lex_force_int ())
1499 if (lex_integer () < 1)
1501 msg (SE, _("ID beginning column (%ld) must be positive."),
1505 rpd->id_beg = lex_integer ();
1508 lex_negative_to_dash ();
1510 if (lex_match ('-'))
1512 if (!lex_force_int ())
1514 if (lex_integer () < 1)
1516 msg (SE, _("ID ending column (%ld) must be positive."),
1520 if (lex_integer () < rpd->id_end)
1522 msg (SE, _("ID ending column (%ld) cannot be less than "
1523 "ID beginning column (%d)."),
1524 lex_integer (), rpd->id_beg);
1528 rpd->id_end = lex_integer ();
1531 else rpd->id_end = rpd->id_beg;
1533 if (!lex_force_match ('='))
1535 rpd->id_var = parse_variable ();
1536 if (rpd->id_var == NULL)
1539 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1540 rpd->id_value = xnmalloc (rpd->id_var->nv, sizeof *rpd->id_value);
1542 else if (lex_match_id ("TABLE"))
1544 else if (lex_match_id ("NOTABLE"))
1546 else if (lex_match_id ("DATA"))
1554 if (!lex_force_match ('/'))
1558 /* Comes here when DATA specification encountered. */
1559 if (!saw_starts || !saw_occurs)
1562 msg (SE, _("Missing required specification STARTS."));
1564 msg (SE, _("Missing required specification OCCURS."));
1568 /* Enforce ID restriction. */
1569 if (saw_id && !saw_continued)
1571 msg (SE, _("ID specified without CONTINUED."));
1575 /* Calculate and check starts_end, cont_end if necessary. */
1576 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1578 rpd->starts_end.num = fh_get_record_width (fh);
1579 if (rpd->starts_beg.num != 0
1580 && rpd->starts_beg.num > rpd->starts_end.num)
1582 msg (SE, _("STARTS beginning column (%d) exceeds "
1583 "default STARTS ending column taken from file's "
1584 "record width (%d)."),
1585 rpd->starts_beg.num, rpd->starts_end.num);
1589 if (rpd->cont_end.num == 0 && rpd->cont_end.var == NULL)
1591 rpd->cont_end.num = fh_get_record_width (fh);
1592 if (rpd->cont_beg.num != 0
1593 && rpd->cont_beg.num > rpd->cont_end.num)
1595 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1596 "default CONTINUED ending column taken from file's "
1597 "record width (%d)."),
1598 rpd->cont_beg.num, rpd->cont_end.num);
1604 if (!parse_repeating_data (&rpd->first, &rpd->last))
1607 /* Calculate length if necessary. */
1610 struct dls_var_spec *iter;
1612 for (iter = rpd->first; iter; iter = iter->next)
1613 if (iter->lc > rpd->length.num)
1614 rpd->length.num = iter->lc;
1615 assert (rpd->length.num != 0);
1619 dump_fixed_table (rpd->first, fh, rpd->last->rec);
1621 add_transformation (repeating_data_trns_proc, repeating_data_trns_free, rpd);
1623 return lex_end_of_command ();
1626 repeating_data_trns_free (rpd);
1627 return CMD_CASCADING_FAILURE;
1630 /* Finds the input format specification for variable V and puts
1631 it in SPEC. Because of the way that DATA LIST is structured,
1632 this is nontrivial. */
1634 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1638 for (i = 0; i < n_trns; i++)
1640 struct transformation *trns = &t_trns[i];
1642 if (trns->proc == data_list_trns_proc)
1644 struct data_list_pgm *pgm = trns->private;
1645 struct dls_var_spec *iter;
1647 for (iter = pgm->first; iter; iter = iter->next)
1650 *spec = iter->input;
1659 /* Parses a number or a variable name from the syntax file and puts
1660 the results in VALUE. Ensures that the number is at least 1; else
1661 emits an error based on MESSAGE. Returns nonzero only if
1664 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1669 value->var = parse_variable ();
1670 if (value->var == NULL)
1672 if (value->var->type == ALPHA)
1674 msg (SE, _("String variable not allowed here."));
1678 else if (lex_is_integer ())
1680 value->num = lex_integer ();
1684 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1690 msg (SE, _("Variable or integer expected for %s."), message);
1696 /* Parses data specifications for repeating data groups, adding
1697 them to the linked list with head FIRST and tail LAST.
1698 Returns nonzero only if successful. */
1700 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1702 struct fixed_parsing_state fx;
1708 while (token != '.')
1710 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1713 if (lex_is_number ())
1715 if (!fixed_parse_compatible (&fx, first, last))
1718 else if (token == '(')
1720 if (!fixed_parse_fortran (&fx, first, last))
1725 msg (SE, _("SPSS-like or FORTRAN-like format "
1726 "specification expected after variable names."));
1730 for (i = 0; i < fx.name_cnt; i++)
1738 for (i = 0; i < fx.name_cnt; i++)
1744 /* Obtains the real value for rpd_num_or_var N in case C and returns
1745 it. The valid range is nonnegative numbers, but numbers outside
1746 this range can be returned and should be handled by the caller as
1749 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1753 double v = case_num (c, n->var->fv);
1754 return v != SYSMIS && v >= INT_MIN && v <= INT_MAX ? v : -1;
1760 /* Parameter record passed to rpd_parse_record(). */
1761 struct rpd_parse_info
1763 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1764 const char *line; /* Line being parsed. */
1765 size_t len; /* Line length. */
1766 int beg, end; /* First and last column of first occurrence. */
1767 int ofs; /* Column offset between repeated occurrences. */
1768 struct ccase *c; /* Case to fill in. */
1769 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1770 int max_occurs; /* Max number of occurrences to parse. */
1773 /* Parses one record of repeated data and outputs corresponding
1774 cases. Returns number of occurrences parsed up to the
1775 maximum specified in INFO. */
1777 rpd_parse_record (const struct rpd_parse_info *info)
1779 struct repeating_data_trns *t = info->trns;
1780 int cur = info->beg;
1783 /* Handle record ID values. */
1786 union value id_temp[MAX_ELEMS_PER_VALUE];
1788 /* Parse record ID into V. */
1792 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1793 di.v = info->verify_id ? id_temp : t->id_value;
1796 di.format = t->id_spec;
1803 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1805 char expected_str [MAX_FORMATTED_LEN + 1];
1806 char actual_str [MAX_FORMATTED_LEN + 1];
1808 data_out (expected_str, &t->id_var->print, t->id_value);
1809 expected_str[t->id_var->print.w] = '\0';
1811 data_out (actual_str, &t->id_var->print, id_temp);
1812 actual_str[t->id_var->print.w] = '\0';
1815 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1816 actual_str, expected_str);
1822 /* Iterate over the set of expected occurrences and record each of
1823 them as a separate case. FIXME: We need to execute any
1824 transformations that follow the current one. */
1828 for (occurrences = 0; occurrences < info->max_occurs; )
1830 if (cur + info->ofs > info->end + 1)
1835 struct dls_var_spec *var_spec = t->first;
1837 for (; var_spec; var_spec = var_spec->next)
1839 int fc = var_spec->fc - 1 + cur;
1840 int lc = var_spec->lc - 1 + cur;
1842 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1847 _("Variable %s starting in column %d extends "
1848 "beyond physical record length of %d."),
1849 var_spec->v->name, fc, info->len);
1855 data_in_finite_line (&di, info->line, info->len, fc, lc);
1856 di.v = case_data_rw (info->c, var_spec->fv);
1859 di.format = var_spec->input;
1869 if (!t->write_case (t->wc_data))
1877 /* Reads one set of repetitions of the elements in the REPEATING
1878 DATA structure. Returns TRNS_CONTINUE on success,
1879 TRNS_DROP_CASE on end of file or on failure, or TRNS_NEXT_CASE. */
1881 repeating_data_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
1883 struct repeating_data_trns *t = trns_;
1885 struct fixed_string line; /* Current record. */
1887 int starts_beg; /* Starting column. */
1888 int starts_end; /* Ending column. */
1889 int occurs; /* Number of repetitions. */
1890 int length; /* Length of each occurrence. */
1891 int cont_beg; /* Starting column for continuation lines. */
1892 int cont_end; /* Ending column for continuation lines. */
1894 int occurs_left; /* Number of occurrences remaining. */
1896 int code; /* Return value from rpd_parse_record(). */
1898 int skip_first_record = 0;
1900 dfm_push (t->reader);
1902 /* Read the current record. */
1903 dfm_reread_record (t->reader, 1);
1904 dfm_expand_tabs (t->reader);
1905 if (dfm_eof (t->reader))
1906 return TRNS_DROP_CASE;
1907 dfm_get_record (t->reader, &line);
1908 dfm_forward_record (t->reader);
1910 /* Calculate occurs, length. */
1911 occurs_left = occurs = realize_value (&t->occurs, c);
1914 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1915 return TRNS_NEXT_CASE;
1917 starts_beg = realize_value (&t->starts_beg, c);
1918 if (starts_beg <= 0)
1920 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1923 return TRNS_NEXT_CASE;
1925 starts_end = realize_value (&t->starts_end, c);
1926 if (starts_end < starts_beg)
1928 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1929 "beginning column (%d)."),
1930 starts_end, starts_beg);
1931 skip_first_record = 1;
1933 length = realize_value (&t->length, c);
1936 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1938 occurs = occurs_left = 1;
1940 cont_beg = realize_value (&t->cont_beg, c);
1943 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1946 return TRNS_DROP_CASE;
1948 cont_end = realize_value (&t->cont_end, c);
1949 if (cont_end < cont_beg)
1951 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1952 "beginning column (%d)."),
1953 cont_end, cont_beg);
1954 return TRNS_DROP_CASE;
1957 /* Parse the first record. */
1958 if (!skip_first_record)
1960 struct rpd_parse_info info;
1962 info.line = ls_c_str (&line);
1963 info.len = ls_length (&line);
1964 info.beg = starts_beg;
1965 info.end = starts_end;
1969 info.max_occurs = occurs_left;
1970 code = rpd_parse_record (&info);
1972 return TRNS_DROP_CASE;
1973 occurs_left -= code;
1975 else if (cont_beg == 0)
1976 return TRNS_NEXT_CASE;
1978 /* Make sure, if some occurrences are left, that we have
1979 continuation records. */
1980 if (occurs_left > 0 && cont_beg == 0)
1983 _("Number of repetitions specified on OCCURS (%d) "
1984 "exceed number of repetitions available in "
1985 "space on STARTS (%d), and CONTINUED not specified."),
1986 occurs, (starts_end - starts_beg + 1) / length);
1987 return TRNS_DROP_CASE;
1990 /* Go on to additional records. */
1991 while (occurs_left != 0)
1993 struct rpd_parse_info info;
1995 assert (occurs_left >= 0);
1997 /* Read in another record. */
1998 if (dfm_eof (t->reader))
2001 _("Unexpected end of file with %d repetitions "
2002 "remaining out of %d."),
2003 occurs_left, occurs);
2004 return TRNS_DROP_CASE;
2006 dfm_expand_tabs (t->reader);
2007 dfm_get_record (t->reader, &line);
2008 dfm_forward_record (t->reader);
2010 /* Parse this record. */
2012 info.line = ls_c_str (&line);
2013 info.len = ls_length (&line);
2014 info.beg = cont_beg;
2015 info.end = cont_end;
2019 info.max_occurs = occurs_left;
2020 code = rpd_parse_record (&info);;
2022 return TRNS_DROP_CASE;
2023 occurs_left -= code;
2026 dfm_pop (t->reader);
2028 /* FIXME: This is a kluge until we've implemented multiplexing of
2030 return TRNS_NEXT_CASE;
2033 /* Frees a REPEATING DATA transformation.
2034 Returns true if successful, false if an I/O error occurred. */
2036 repeating_data_trns_free (void *rpd_)
2038 struct repeating_data_trns *rpd = rpd_;
2040 destroy_dls_var_spec (rpd->first);
2041 dfm_close_reader (rpd->reader);
2042 free (rpd->id_value);
2047 /* Lets repeating_data_trns_proc() know how to write the cases
2048 that it composes. Not elegant. */
2050 repeating_data_set_write_case (struct transformation *trns_,
2051 write_case_func *write_case,
2052 write_case_data wc_data)
2054 struct repeating_data_trns *t = trns_->private;
2056 assert (trns_->proc == repeating_data_trns_proc);
2057 t->write_case = write_case;
2058 t->wc_data = wc_data;