1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 #include "data-list.h"
29 #include <data/case.h>
30 #include <data/data-in.h>
31 #include <data/dictionary.h>
32 #include <data/format.h>
33 #include <data/settings.h>
34 #include <data/variable.h>
35 #include <language/command.h>
36 #include <language/data-io/data-list.h>
37 #include <language/data-io/data-reader.h>
38 #include <language/data-io/file-handle.h>
39 #include <language/data-io/file-type.h>
40 #include <language/data-io/inpt-pgm.h>
41 #include <language/lexer/lexer.h>
42 #include <libpspp/alloc.h>
43 #include <libpspp/compiler.h>
44 #include <libpspp/message.h>
45 #include <libpspp/message.h>
46 #include <libpspp/misc.h>
47 #include <libpspp/str.h>
48 #include <output/table.h>
49 #include <procedure.h>
52 #define _(msgid) gettext (msgid)
54 /* Utility function. */
56 /* FIXME: Either REPEATING DATA must be the last transformation, or we
57 must multiplex the transformations that follow (i.e., perform them
58 for every case that we produce from a repetition instance).
59 Currently we do neither. We should do one or the other. */
61 /* Describes how to parse one variable. */
64 struct dls_var_spec *next; /* Next specification in list. */
66 /* Both free and fixed formats. */
67 struct fmt_spec input; /* Input format of this field. */
68 struct variable *v; /* Associated variable. Used only in
69 parsing. Not safe later. */
70 int fv; /* First value in case. */
72 /* Fixed format only. */
73 int rec; /* Record number (1-based). */
74 int fc, lc; /* Column numbers in record. */
76 /* Free format only. */
77 char name[LONG_NAME_LEN + 1]; /* Name of variable. */
80 /* Constants for DATA LIST type. */
81 /* Must match table in cmd_data_list(). */
89 /* DATA LIST private data structure. */
92 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
93 struct dfm_reader *reader; /* Data file reader. */
95 int type; /* A DLS_* constant. */
96 struct variable *end; /* Variable specified on END subcommand. */
97 int rec_cnt; /* Number of records. */
98 size_t case_size; /* Case size in bytes. */
99 char *delims; /* Delimiters if any; not null-terminated. */
100 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
103 static const struct case_source_class data_list_source_class;
105 static void rpd_msg (enum msg_class, const char *format, ...);
106 static int parse_fixed (struct data_list_pgm *);
107 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
108 static void dump_fixed_table (const struct dls_var_spec *,
109 const struct file_handle *, int rec_cnt);
110 static void dump_free_table (const struct data_list_pgm *,
111 const struct file_handle *);
112 static void destroy_dls_var_spec (struct dls_var_spec *);
113 static trns_free_func data_list_trns_free;
114 static trns_proc_func data_list_trns_proc;
119 struct data_list_pgm *dls;
120 int table = -1; /* Print table if nonzero, -1=undecided. */
121 struct file_handle *fh = fh_inline_file ();
123 if (!in_input_program () && !in_file_type ())
124 discard_variables ();
126 dls = xmalloc (sizeof *dls);
133 dls->first = dls->last = NULL;
137 if (lex_match_id ("FILE"))
140 fh = fh_parse (FH_REF_FILE | FH_REF_INLINE);
143 if (in_file_type () && fh != fh_get_default_handle ())
145 msg (SE, _("DATA LIST must use the same file "
146 "as the enclosing FILE TYPE."));
150 else if (lex_match_id ("RECORDS"))
154 if (!lex_force_int ())
156 dls->rec_cnt = lex_integer ();
160 else if (lex_match_id ("END"))
164 msg (SE, _("The END subcommand may only be specified once."));
169 if (!lex_force_id ())
171 dls->end = dict_lookup_var (default_dict, tokid);
173 dls->end = dict_create_var_assert (default_dict, tokid, 0);
176 else if (token == T_ID)
178 if (lex_match_id ("NOTABLE"))
180 else if (lex_match_id ("TABLE"))
185 if (lex_match_id ("FIXED"))
187 else if (lex_match_id ("FREE"))
189 else if (lex_match_id ("LIST"))
199 msg (SE, _("Only one of FIXED, FREE, or LIST may "
205 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
208 while (!lex_match (')'))
212 if (lex_match_id ("TAB"))
214 else if (token == T_STRING && tokstr.length == 1)
216 delim = tokstr.string[0];
225 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
226 dls->delims[dls->delim_cnt++] = delim;
240 dls->case_size = dict_get_case_size (default_dict);
241 fh_set_default_handle (fh);
244 dls->type = DLS_FIXED;
248 if (dls->type == DLS_FREE)
254 if (dls->type == DLS_FIXED)
256 if (!parse_fixed (dls))
259 dump_fixed_table (dls->first, fh, dls->rec_cnt);
263 if (!parse_free (&dls->first, &dls->last))
266 dump_free_table (dls, fh);
269 dls->reader = dfm_open_reader (fh);
270 if (dls->reader == NULL)
273 if (vfm_source != NULL)
274 add_transformation (data_list_trns_proc, data_list_trns_free, dls);
276 vfm_source = create_case_source (&data_list_source_class, dls);
281 data_list_trns_free (dls);
282 return CMD_CASCADING_FAILURE;
285 /* Adds SPEC to the linked list with head at FIRST and tail at
288 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
289 struct dls_var_spec *spec)
296 (*last)->next = spec;
300 /* Fixed-format parsing. */
302 /* Used for chaining together fortran-like format specifiers. */
305 struct fmt_list *next;
308 struct fmt_list *down;
311 /* State of parsing DATA LIST. */
312 struct fixed_parsing_state
314 char **name; /* Variable names. */
315 size_t name_cnt; /* Number of names. */
317 int recno; /* Index of current record. */
318 int sc; /* 1-based column number of starting column for
319 next field to output. */
322 static int fixed_parse_compatible (struct fixed_parsing_state *,
323 struct dls_var_spec **,
324 struct dls_var_spec **);
325 static int fixed_parse_fortran (struct fixed_parsing_state *,
326 struct dls_var_spec **,
327 struct dls_var_spec **);
329 /* Parses all the variable specifications for DATA LIST FIXED,
330 storing them into DLS. Returns nonzero if successful. */
332 parse_fixed (struct data_list_pgm *dls)
334 struct fixed_parsing_state fx;
342 while (lex_match ('/'))
345 if (lex_is_integer ())
347 if (lex_integer () < fx.recno)
349 msg (SE, _("The record number specified, %ld, is "
350 "before the previous record, %d. Data "
351 "fields must be listed in order of "
352 "increasing record number."),
353 lex_integer (), fx.recno - 1);
357 fx.recno = lex_integer ();
363 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
366 if (lex_is_number ())
368 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
371 else if (token == '(')
373 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
378 msg (SE, _("SPSS-like or FORTRAN-like format "
379 "specification expected after variable names."));
383 for (i = 0; i < fx.name_cnt; i++)
387 if (dls->first == NULL)
389 msg (SE, _("At least one variable must be specified."));
392 if (dls->rec_cnt && dls->last->rec > dls->rec_cnt)
394 msg (SE, _("Variables are specified on records that "
395 "should not exist according to RECORDS subcommand."));
398 else if (!dls->rec_cnt)
399 dls->rec_cnt = dls->last->rec;
400 return lex_end_of_command () == CMD_SUCCESS;
403 for (i = 0; i < fx.name_cnt; i++)
409 /* Parses a variable specification in the form 1-10 (A) based on
410 FX and adds specifications to the linked list with head at
411 FIRST and tail at LAST. */
413 fixed_parse_compatible (struct fixed_parsing_state *fx,
414 struct dls_var_spec **first, struct dls_var_spec **last)
416 struct fmt_spec input;
422 if (!lex_force_int ())
427 msg (SE, _("Column positions for fields must be positive."));
433 lex_negative_to_dash ();
436 if (!lex_force_int ())
441 msg (SE, _("Column positions for fields must be positive."));
446 msg (SE, _("The ending column for a field must be "
447 "greater than the starting column."));
456 /* Divide columns evenly. */
457 input.w = (lc - fc + 1) / fx->name_cnt;
458 if ((lc - fc + 1) % fx->name_cnt)
460 msg (SE, _("The %d columns %d-%d "
461 "can't be evenly divided into %d fields."),
462 lc - fc + 1, fc, lc, fx->name_cnt);
466 /* Format specifier. */
469 struct fmt_desc *fdp;
475 input.type = parse_format_specifier_name (&cp, 0);
476 if (input.type == -1)
480 msg (SE, _("A format specifier on this line "
481 "has extra characters on the end."));
491 if (lex_is_integer ())
493 if (lex_integer () < 1)
495 msg (SE, _("The value for number of decimal places "
496 "must be at least 1."));
500 input.d = lex_integer ();
506 fdp = &formats[input.type];
507 if (fdp->n_args < 2 && input.d)
509 msg (SE, _("Input format %s doesn't accept decimal places."),
517 if (!lex_force_match (')'))
525 if (!check_input_specifier (&input, 1))
528 /* Start column for next specification. */
531 /* Width of variables to create. */
532 if (input.type == FMT_A || input.type == FMT_AHEX)
537 /* Create variables and var specs. */
538 for (i = 0; i < fx->name_cnt; i++)
540 struct dls_var_spec *spec;
543 v = dict_create_var (default_dict, fx->name[i], width);
546 convert_fmt_ItoO (&input, &v->print);
551 v = dict_lookup_var_assert (default_dict, fx->name[i]);
552 if (vfm_source == NULL)
554 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
557 if ((width != 0) != (v->width != 0))
559 msg (SE, _("There is already a variable %s of a "
564 if (width != 0 && width != v->width)
566 msg (SE, _("There is already a string variable %s of a "
567 "different width."), fx->name[i]);
572 spec = xmalloc (sizeof *spec);
576 spec->rec = fx->recno;
577 spec->fc = fc + input.w * i;
578 spec->lc = spec->fc + input.w - 1;
579 append_var_spec (first, last, spec);
584 /* Destroy format list F and, if RECURSE is nonzero, all its
587 destroy_fmt_list (struct fmt_list *f, int recurse)
589 struct fmt_list *next;
594 if (recurse && f->f.type == FMT_DESCEND)
595 destroy_fmt_list (f->down, 1);
600 /* Takes a hierarchically structured fmt_list F as constructed by
601 fixed_parse_fortran(), and flattens it, adding the variable
602 specifications to the linked list with head FIRST and tail
603 LAST. NAME_IDX is used to take values from the list of names
604 in FX; it should initially point to a value of 0. */
606 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
607 struct dls_var_spec **first, struct dls_var_spec **last,
612 for (; f; f = f->next)
613 if (f->f.type == FMT_X)
615 else if (f->f.type == FMT_T)
617 else if (f->f.type == FMT_NEWREC)
619 fx->recno += f->count;
623 for (i = 0; i < f->count; i++)
624 if (f->f.type == FMT_DESCEND)
626 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
631 struct dls_var_spec *spec;
635 if (formats[f->f.type].cat & FCAT_STRING)
639 if (*name_idx >= fx->name_cnt)
641 msg (SE, _("The number of format "
642 "specifications exceeds the given number of "
647 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
650 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
654 spec = xmalloc (sizeof *spec);
658 spec->rec = fx->recno;
660 spec->lc = fx->sc + f->f.w - 1;
661 append_var_spec (first, last, spec);
663 convert_fmt_ItoO (&spec->input, &v->print);
671 /* Recursively parses a FORTRAN-like format specification into
672 the linked list with head FIRST and tail TAIL. LEVEL is the
673 level of recursion, starting from 0. Returns the parsed
674 specification if successful, or a null pointer on failure. */
675 static struct fmt_list *
676 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
677 struct dls_var_spec **first,
678 struct dls_var_spec **last)
680 struct fmt_list *head = NULL;
681 struct fmt_list *tail = NULL;
683 lex_force_match ('(');
687 struct fmt_list *new = xmalloc (sizeof *new);
690 /* Append new to list. */
698 if (lex_is_integer ())
700 new->count = lex_integer ();
706 /* Parse format specifier. */
709 new->f.type = FMT_DESCEND;
710 new->down = fixed_parse_fortran_internal (fx, first, last);
711 if (new->down == NULL)
714 else if (lex_match ('/'))
715 new->f.type = FMT_NEWREC;
716 else if (!parse_format_specifier (&new->f, FMTP_ALLOW_XT)
717 || !check_input_specifier (&new->f, 1))
722 lex_force_match (')');
727 destroy_fmt_list (head, 0);
732 /* Parses a FORTRAN-like format specification into the linked
733 list with head FIRST and tail LAST. Returns nonzero if
736 fixed_parse_fortran (struct fixed_parsing_state *fx,
737 struct dls_var_spec **first, struct dls_var_spec **last)
739 struct fmt_list *list;
742 list = fixed_parse_fortran_internal (fx, first, last);
747 dump_fmt_list (fx, list, first, last, &name_idx);
748 destroy_fmt_list (list, 1);
749 if (name_idx < fx->name_cnt)
751 msg (SE, _("There aren't enough format specifications "
752 "to match the number of variable names given."));
759 /* Displays a table giving information on fixed-format variable
760 parsing on DATA LIST. */
761 /* FIXME: The `Columns' column should be divided into three columns,
762 one for the starting column, one for the dash, one for the ending
763 column; then right-justify the starting column and left-justify the
766 dump_fixed_table (const struct dls_var_spec *specs,
767 const struct file_handle *fh, int rec_cnt)
769 const struct dls_var_spec *spec;
773 for (i = 0, spec = specs; spec; spec = spec->next)
775 t = tab_create (4, i + 1, 0);
776 tab_columns (t, TAB_COL_DOWN, 1);
777 tab_headers (t, 0, 0, 1, 0);
778 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
779 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
780 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
781 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
782 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
783 tab_hline (t, TAL_2, 0, 3, 1);
784 tab_dim (t, tab_natural_dimensions);
786 for (i = 1, spec = specs; spec; spec = spec->next, i++)
788 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
789 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
790 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
792 tab_text (t, 3, i, TAB_LEFT | TAB_FIX,
793 fmt_to_string (&spec->input));
796 tab_title (t, ngettext ("Reading %d record from %s.",
797 "Reading %d records from %s.", rec_cnt),
798 rec_cnt, fh_get_name (fh));
802 /* Free-format parsing. */
804 /* Parses variable specifications for DATA LIST FREE and adds
805 them to the linked list with head FIRST and tail LAST.
806 Returns nonzero only if successful. */
808 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
813 struct fmt_spec input, output;
819 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
824 if (!parse_format_specifier (&input, 0)
825 || !check_input_specifier (&input, 1)
826 || !lex_force_match (')'))
828 for (i = 0; i < name_cnt; i++)
833 convert_fmt_ItoO (&input, &output);
838 input = make_input_format (FMT_F, 8, 0);
839 output = *get_format ();
842 if (input.type == FMT_A || input.type == FMT_AHEX)
846 for (i = 0; i < name_cnt; i++)
848 struct dls_var_spec *spec;
851 v = dict_create_var (default_dict, name[i], width);
855 msg (SE, _("%s is a duplicate variable name."), name[i]);
858 v->print = v->write = output;
860 spec = xmalloc (sizeof *spec);
864 str_copy_trunc (spec->name, sizeof spec->name, v->name);
865 append_var_spec (first, last, spec);
867 for (i = 0; i < name_cnt; i++)
872 return lex_end_of_command () == CMD_SUCCESS;
875 /* Displays a table giving information on free-format variable parsing
878 dump_free_table (const struct data_list_pgm *dls,
879 const struct file_handle *fh)
885 struct dls_var_spec *spec;
886 for (i = 0, spec = dls->first; spec; spec = spec->next)
890 t = tab_create (2, i + 1, 0);
891 tab_columns (t, TAB_COL_DOWN, 1);
892 tab_headers (t, 0, 0, 1, 0);
893 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
894 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
895 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
896 tab_hline (t, TAL_2, 0, 1, 1);
897 tab_dim (t, tab_natural_dimensions);
900 struct dls_var_spec *spec;
902 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
904 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
905 tab_text (t, 1, i, TAB_LEFT | TAB_FIX, fmt_to_string (&spec->input));
909 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
914 /* Input procedure. */
916 /* Extracts a field from the current position in the current
917 record. Fields can be unquoted or quoted with single- or
918 double-quote characters. *FIELD is set to the field content.
919 After parsing the field, sets the current position in the
920 record to just past the field and any trailing delimiter.
921 END_BLANK is used internally; it should be initialized by the
922 caller to 0 and left alone afterward. Returns 0 on failure or
923 a 1-based column number indicating the beginning of the field
926 cut_field (const struct data_list_pgm *dls, struct fixed_string *field,
929 struct fixed_string line;
933 if (dfm_eof (dls->reader))
935 if (dls->delim_cnt == 0)
936 dfm_expand_tabs (dls->reader);
937 dfm_get_record (dls->reader, &line);
939 cp = ls_c_str (&line);
940 if (dls->delim_cnt == 0)
942 /* Skip leading whitespace. */
943 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
945 if (cp >= ls_end (&line))
948 /* Handle actual data, whether quoted or unquoted. */
949 if (*cp == '\'' || *cp == '"')
953 field->string = ++cp;
954 while (cp < ls_end (&line) && *cp != quote)
956 field->length = cp - field->string;
957 if (cp < ls_end (&line))
960 msg (SW, _("Quoted string missing terminating `%c'."), quote);
965 while (cp < ls_end (&line)
966 && !isspace ((unsigned char) *cp) && *cp != ',')
968 field->length = cp - field->string;
971 /* Skip trailing whitespace and a single comma if present. */
972 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
974 if (cp < ls_end (&line) && *cp == ',')
979 if (cp >= ls_end (&line))
981 int column = dfm_column_start (dls->reader);
982 /* A blank line or a line that ends in \t has a
983 trailing blank field. */
984 if (column == 1 || (column > 1 && cp[-1] == '\t'))
989 field->string = ls_end (&line);
991 dfm_forward_record (dls->reader);
1006 while (cp < ls_end (&line)
1007 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1009 field->length = cp - field->string;
1010 if (cp < ls_end (&line))
1015 dfm_forward_columns (dls->reader, field->string - line.string);
1016 column_start = dfm_column_start (dls->reader);
1018 dfm_forward_columns (dls->reader, cp - field->string);
1020 return column_start;
1023 static bool read_from_data_list_fixed (const struct data_list_pgm *,
1025 static bool read_from_data_list_free (const struct data_list_pgm *,
1027 static bool read_from_data_list_list (const struct data_list_pgm *,
1030 /* Reads a case from DLS into C.
1031 Returns true if successful, false at end of file or on I/O error. */
1033 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
1037 dfm_push (dls->reader);
1041 retval = read_from_data_list_fixed (dls, c);
1044 retval = read_from_data_list_free (dls, c);
1047 retval = read_from_data_list_list (dls, c);
1052 dfm_pop (dls->reader);
1057 /* Reads a case from the data file into C, parsing it according
1058 to fixed-format syntax rules in DLS.
1059 Returns true if successful, false at end of file or on I/O error. */
1061 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
1063 struct dls_var_spec *var_spec = dls->first;
1066 if (dfm_eof (dls->reader))
1068 for (i = 1; i <= dls->rec_cnt; i++)
1070 struct fixed_string line;
1072 if (dfm_eof (dls->reader))
1074 /* Note that this can't occur on the first record. */
1075 msg (SW, _("Partial case of %d of %d records discarded."),
1076 i - 1, dls->rec_cnt);
1079 dfm_expand_tabs (dls->reader);
1080 dfm_get_record (dls->reader, &line);
1082 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1086 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1087 var_spec->fc, var_spec->lc);
1088 di.v = case_data_rw (c, var_spec->fv);
1089 di.flags = DI_IMPLIED_DECIMALS;
1090 di.f1 = var_spec->fc;
1091 di.format = var_spec->input;
1096 dfm_forward_record (dls->reader);
1102 /* Reads a case from the data file into C, parsing it according
1103 to free-format syntax rules in DLS.
1104 Returns true if successful, false at end of file or on I/O error. */
1106 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
1108 struct dls_var_spec *var_spec;
1111 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1113 struct fixed_string field;
1116 /* Cut out a field and read in a new record if necessary. */
1119 column = cut_field (dls, &field, &end_blank);
1123 if (!dfm_eof (dls->reader))
1124 dfm_forward_record (dls->reader);
1125 if (dfm_eof (dls->reader))
1127 if (var_spec != dls->first)
1128 msg (SW, _("Partial case discarded. The first variable "
1129 "missing was %s."), var_spec->name);
1137 di.s = ls_c_str (&field);
1138 di.e = ls_end (&field);
1139 di.v = case_data_rw (c, var_spec->fv);
1142 di.format = var_spec->input;
1149 /* Reads a case from the data file and parses it according to
1150 list-format syntax rules.
1151 Returns true if successful, false at end of file or on I/O error. */
1153 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
1155 struct dls_var_spec *var_spec;
1158 if (dfm_eof (dls->reader))
1161 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1163 struct fixed_string field;
1166 /* Cut out a field and check for end-of-line. */
1167 column = cut_field (dls, &field, &end_blank);
1170 if (get_undefined ())
1171 msg (SW, _("Missing value(s) for all variables from %s onward. "
1172 "These will be filled with the system-missing value "
1173 "or blanks, as appropriate."),
1175 for (; var_spec; var_spec = var_spec->next)
1177 int width = get_format_var_width (&var_spec->input);
1179 case_data_rw (c, var_spec->fv)->f = SYSMIS;
1181 memset (case_data_rw (c, var_spec->fv)->s, ' ', width);
1189 di.s = ls_c_str (&field);
1190 di.e = ls_end (&field);
1191 di.v = case_data_rw (c, var_spec->fv);
1194 di.format = var_spec->input;
1199 dfm_forward_record (dls->reader);
1203 /* Destroys SPEC. */
1205 destroy_dls_var_spec (struct dls_var_spec *spec)
1207 struct dls_var_spec *next;
1209 while (spec != NULL)
1217 /* Destroys DATA LIST transformation DLS.
1218 Returns true if successful, false if an I/O error occurred. */
1220 data_list_trns_free (void *dls_)
1222 struct data_list_pgm *dls = dls_;
1224 destroy_dls_var_spec (dls->first);
1225 dfm_close_reader (dls->reader);
1230 /* Handle DATA LIST transformation DLS, parsing data into C. */
1232 data_list_trns_proc (void *dls_, struct ccase *c, int case_num UNUSED)
1234 struct data_list_pgm *dls = dls_;
1237 if (read_from_data_list (dls, c))
1238 retval = TRNS_CONTINUE;
1239 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
1241 /* An I/O error, or encountering end of file for a second
1242 time, should be escalated into a more serious error. */
1243 retval = TRNS_ERROR;
1246 retval = TRNS_DROP_CASE;
1248 /* If there was an END subcommand handle it. */
1249 if (dls->end != NULL)
1251 double *end = &case_data_rw (c, dls->end->fv)->f;
1252 if (retval == TRNS_DROP_CASE)
1255 retval = TRNS_CONTINUE;
1264 /* Reads all the records from the data file and passes them to
1266 Returns true if successful, false if an I/O error occurred. */
1268 data_list_source_read (struct case_source *source,
1270 write_case_func *write_case, write_case_data wc_data)
1272 struct data_list_pgm *dls = source->aux;
1278 if (!read_from_data_list (dls, c))
1279 return !dfm_reader_error (dls->reader);
1281 dfm_push (dls->reader);
1282 ok = write_case (wc_data);
1283 dfm_pop (dls->reader);
1289 /* Destroys the source's internal data. */
1291 data_list_source_destroy (struct case_source *source)
1293 data_list_trns_free (source->aux);
1296 static const struct case_source_class data_list_source_class =
1300 data_list_source_read,
1301 data_list_source_destroy,
1304 /* REPEATING DATA. */
1306 /* Represents a number or a variable. */
1307 struct rpd_num_or_var
1309 int num; /* Value, or 0. */
1310 struct variable *var; /* Variable, if number==0. */
1313 /* REPEATING DATA private data structure. */
1314 struct repeating_data_trns
1316 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1317 struct dfm_reader *reader; /* Input file, never NULL. */
1319 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1320 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1321 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1322 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1323 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1324 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1326 /* ID subcommand. */
1327 int id_beg, id_end; /* Beginning & end columns. */
1328 struct variable *id_var; /* DATA LIST variable. */
1329 struct fmt_spec id_spec; /* Input format spec. */
1330 union value *id_value; /* ID value. */
1332 write_case_func *write_case;
1333 write_case_data wc_data;
1336 static trns_free_func repeating_data_trns_free;
1337 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1338 static int parse_repeating_data (struct dls_var_spec **,
1339 struct dls_var_spec **);
1340 static void find_variable_input_spec (struct variable *v,
1341 struct fmt_spec *spec);
1343 int cmd_repeating_data (void);
1345 /* Parses the REPEATING DATA command. */
1347 cmd_repeating_data (void)
1349 struct repeating_data_trns *rpd;
1350 int table = 1; /* Print table? */
1351 bool saw_starts = false; /* Saw STARTS subcommand? */
1352 bool saw_occurs = false; /* Saw OCCURS subcommand? */
1353 bool saw_length = false; /* Saw LENGTH subcommand? */
1354 bool saw_continued = false; /* Saw CONTINUED subcommand? */
1355 bool saw_id = false; /* Saw ID subcommand? */
1356 struct file_handle *const fh = fh_get_default_handle ();
1358 assert (in_input_program () || in_file_type ());
1360 rpd = xmalloc (sizeof *rpd);
1361 rpd->reader = dfm_open_reader (fh);
1362 rpd->first = rpd->last = NULL;
1363 rpd->starts_beg.num = 0;
1364 rpd->starts_beg.var = NULL;
1365 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1366 = rpd->cont_end = rpd->starts_beg;
1367 rpd->id_beg = rpd->id_end = 0;
1369 rpd->id_value = NULL;
1375 if (lex_match_id ("FILE"))
1377 struct file_handle *file;
1379 file = fh_parse (FH_REF_FILE | FH_REF_INLINE);
1384 msg (SE, _("REPEATING DATA must use the same file as its "
1385 "corresponding DATA LIST or FILE TYPE."));
1389 else if (lex_match_id ("STARTS"))
1394 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1399 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1402 lex_negative_to_dash ();
1403 if (lex_match ('-'))
1405 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1408 /* Otherwise, rpd->starts_end is uninitialized. We
1409 will initialize it later from the record length
1410 of the file. We can't do so now because the
1411 file handle may not be specified yet. */
1414 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1415 && rpd->starts_beg.num > rpd->starts_end.num)
1417 msg (SE, _("STARTS beginning column (%d) exceeds "
1418 "STARTS ending column (%d)."),
1419 rpd->starts_beg.num, rpd->starts_end.num);
1423 else if (lex_match_id ("OCCURS"))
1428 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1433 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1436 else if (lex_match_id ("LENGTH"))
1441 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1446 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1449 else if (lex_match_id ("CONTINUED"))
1454 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1457 saw_continued = true;
1459 if (!lex_match ('/'))
1461 if (!parse_num_or_var (&rpd->cont_beg,
1462 "CONTINUED beginning column"))
1465 lex_negative_to_dash ();
1467 && !parse_num_or_var (&rpd->cont_end,
1468 "CONTINUED ending column"))
1471 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1472 && rpd->cont_beg.num > rpd->cont_end.num)
1474 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1475 "CONTINUED ending column (%d)."),
1476 rpd->cont_beg.num, rpd->cont_end.num);
1481 rpd->cont_beg.num = 1;
1483 else if (lex_match_id ("ID"))
1488 msg (SE, _("%s subcommand given multiple times."),"ID");
1493 if (!lex_force_int ())
1495 if (lex_integer () < 1)
1497 msg (SE, _("ID beginning column (%ld) must be positive."),
1501 rpd->id_beg = lex_integer ();
1504 lex_negative_to_dash ();
1506 if (lex_match ('-'))
1508 if (!lex_force_int ())
1510 if (lex_integer () < 1)
1512 msg (SE, _("ID ending column (%ld) must be positive."),
1516 if (lex_integer () < rpd->id_end)
1518 msg (SE, _("ID ending column (%ld) cannot be less than "
1519 "ID beginning column (%d)."),
1520 lex_integer (), rpd->id_beg);
1524 rpd->id_end = lex_integer ();
1527 else rpd->id_end = rpd->id_beg;
1529 if (!lex_force_match ('='))
1531 rpd->id_var = parse_variable ();
1532 if (rpd->id_var == NULL)
1535 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1536 rpd->id_value = xnmalloc (rpd->id_var->nv, sizeof *rpd->id_value);
1538 else if (lex_match_id ("TABLE"))
1540 else if (lex_match_id ("NOTABLE"))
1542 else if (lex_match_id ("DATA"))
1550 if (!lex_force_match ('/'))
1554 /* Comes here when DATA specification encountered. */
1555 if (!saw_starts || !saw_occurs)
1558 msg (SE, _("Missing required specification STARTS."));
1560 msg (SE, _("Missing required specification OCCURS."));
1564 /* Enforce ID restriction. */
1565 if (saw_id && !saw_continued)
1567 msg (SE, _("ID specified without CONTINUED."));
1571 /* Calculate and check starts_end, cont_end if necessary. */
1572 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1574 rpd->starts_end.num = fh_get_record_width (fh);
1575 if (rpd->starts_beg.num != 0
1576 && rpd->starts_beg.num > rpd->starts_end.num)
1578 msg (SE, _("STARTS beginning column (%d) exceeds "
1579 "default STARTS ending column taken from file's "
1580 "record width (%d)."),
1581 rpd->starts_beg.num, rpd->starts_end.num);
1585 if (rpd->cont_end.num == 0 && rpd->cont_end.var == NULL)
1587 rpd->cont_end.num = fh_get_record_width (fh);
1588 if (rpd->cont_beg.num != 0
1589 && rpd->cont_beg.num > rpd->cont_end.num)
1591 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1592 "default CONTINUED ending column taken from file's "
1593 "record width (%d)."),
1594 rpd->cont_beg.num, rpd->cont_end.num);
1600 if (!parse_repeating_data (&rpd->first, &rpd->last))
1603 /* Calculate length if necessary. */
1606 struct dls_var_spec *iter;
1608 for (iter = rpd->first; iter; iter = iter->next)
1609 if (iter->lc > rpd->length.num)
1610 rpd->length.num = iter->lc;
1611 assert (rpd->length.num != 0);
1615 dump_fixed_table (rpd->first, fh, rpd->last->rec);
1617 add_transformation (repeating_data_trns_proc, repeating_data_trns_free, rpd);
1619 return lex_end_of_command ();
1622 repeating_data_trns_free (rpd);
1623 return CMD_CASCADING_FAILURE;
1626 /* Finds the input format specification for variable V and puts
1627 it in SPEC. Because of the way that DATA LIST is structured,
1628 this is nontrivial. */
1630 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1634 for (i = 0; i < n_trns; i++)
1636 struct transformation *trns = &t_trns[i];
1638 if (trns->proc == data_list_trns_proc)
1640 struct data_list_pgm *pgm = trns->private;
1641 struct dls_var_spec *iter;
1643 for (iter = pgm->first; iter; iter = iter->next)
1646 *spec = iter->input;
1655 /* Parses a number or a variable name from the syntax file and puts
1656 the results in VALUE. Ensures that the number is at least 1; else
1657 emits an error based on MESSAGE. Returns nonzero only if
1660 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1665 value->var = parse_variable ();
1666 if (value->var == NULL)
1668 if (value->var->type == ALPHA)
1670 msg (SE, _("String variable not allowed here."));
1674 else if (lex_is_integer ())
1676 value->num = lex_integer ();
1680 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1686 msg (SE, _("Variable or integer expected for %s."), message);
1692 /* Parses data specifications for repeating data groups, adding
1693 them to the linked list with head FIRST and tail LAST.
1694 Returns nonzero only if successful. */
1696 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1698 struct fixed_parsing_state fx;
1704 while (token != '.')
1706 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1709 if (lex_is_number ())
1711 if (!fixed_parse_compatible (&fx, first, last))
1714 else if (token == '(')
1716 if (!fixed_parse_fortran (&fx, first, last))
1721 msg (SE, _("SPSS-like or FORTRAN-like format "
1722 "specification expected after variable names."));
1726 for (i = 0; i < fx.name_cnt; i++)
1734 for (i = 0; i < fx.name_cnt; i++)
1740 /* Obtains the real value for rpd_num_or_var N in case C and returns
1741 it. The valid range is nonnegative numbers, but numbers outside
1742 this range can be returned and should be handled by the caller as
1745 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1749 double v = case_num (c, n->var->fv);
1750 return v != SYSMIS && v >= INT_MIN && v <= INT_MAX ? v : -1;
1756 /* Parameter record passed to rpd_parse_record(). */
1757 struct rpd_parse_info
1759 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1760 const char *line; /* Line being parsed. */
1761 size_t len; /* Line length. */
1762 int beg, end; /* First and last column of first occurrence. */
1763 int ofs; /* Column offset between repeated occurrences. */
1764 struct ccase *c; /* Case to fill in. */
1765 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1766 int max_occurs; /* Max number of occurrences to parse. */
1769 /* Parses one record of repeated data and outputs corresponding
1770 cases. Returns number of occurrences parsed up to the
1771 maximum specified in INFO. */
1773 rpd_parse_record (const struct rpd_parse_info *info)
1775 struct repeating_data_trns *t = info->trns;
1776 int cur = info->beg;
1779 /* Handle record ID values. */
1782 union value id_temp[MAX_ELEMS_PER_VALUE];
1784 /* Parse record ID into V. */
1788 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1789 di.v = info->verify_id ? id_temp : t->id_value;
1792 di.format = t->id_spec;
1799 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1801 char expected_str [MAX_FORMATTED_LEN + 1];
1802 char actual_str [MAX_FORMATTED_LEN + 1];
1804 data_out (expected_str, &t->id_var->print, t->id_value);
1805 expected_str[t->id_var->print.w] = '\0';
1807 data_out (actual_str, &t->id_var->print, id_temp);
1808 actual_str[t->id_var->print.w] = '\0';
1811 _("Encountered mismatched record ID \"%s\" "
1812 "expecting \"%s\"."),
1813 actual_str, expected_str);
1819 /* Iterate over the set of expected occurrences and record each of
1820 them as a separate case. FIXME: We need to execute any
1821 transformations that follow the current one. */
1825 for (occurrences = 0; occurrences < info->max_occurs; )
1827 if (cur + info->ofs > info->end + 1)
1832 struct dls_var_spec *var_spec = t->first;
1834 for (; var_spec; var_spec = var_spec->next)
1836 int fc = var_spec->fc - 1 + cur;
1837 int lc = var_spec->lc - 1 + cur;
1839 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1844 _("Variable %s starting in column %d extends "
1845 "beyond physical record length of %d."),
1846 var_spec->v->name, fc, info->len);
1852 data_in_finite_line (&di, info->line, info->len, fc, lc);
1853 di.v = case_data_rw (info->c, var_spec->fv);
1856 di.format = var_spec->input;
1866 if (!t->write_case (t->wc_data))
1874 /* Reads one set of repetitions of the elements in the REPEATING
1875 DATA structure. Returns TRNS_CONTINUE on success,
1876 TRNS_DROP_CASE on end of file or on failure, or TRNS_NEXT_CASE. */
1878 repeating_data_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
1880 struct repeating_data_trns *t = trns_;
1882 struct fixed_string line; /* Current record. */
1884 int starts_beg; /* Starting column. */
1885 int starts_end; /* Ending column. */
1886 int occurs; /* Number of repetitions. */
1887 int length; /* Length of each occurrence. */
1888 int cont_beg; /* Starting column for continuation lines. */
1889 int cont_end; /* Ending column for continuation lines. */
1891 int occurs_left; /* Number of occurrences remaining. */
1893 int code; /* Return value from rpd_parse_record(). */
1895 int skip_first_record = 0;
1897 dfm_push (t->reader);
1899 /* Read the current record. */
1900 dfm_reread_record (t->reader, 1);
1901 dfm_expand_tabs (t->reader);
1902 if (dfm_eof (t->reader))
1903 return TRNS_DROP_CASE;
1904 dfm_get_record (t->reader, &line);
1905 dfm_forward_record (t->reader);
1907 /* Calculate occurs, length. */
1908 occurs_left = occurs = realize_value (&t->occurs, c);
1911 rpd_msg (SE, _("Invalid value %d for OCCURS."), occurs);
1912 return TRNS_NEXT_CASE;
1914 starts_beg = realize_value (&t->starts_beg, c);
1915 if (starts_beg <= 0)
1917 rpd_msg (SE, _("Beginning column for STARTS (%d) must be at least 1."),
1919 return TRNS_NEXT_CASE;
1921 starts_end = realize_value (&t->starts_end, c);
1922 if (starts_end < starts_beg)
1924 rpd_msg (SE, _("Ending column for STARTS (%d) is less than "
1925 "beginning column (%d)."),
1926 starts_end, starts_beg);
1927 skip_first_record = 1;
1929 length = realize_value (&t->length, c);
1932 rpd_msg (SE, _("Invalid value %d for LENGTH."), length);
1934 occurs = occurs_left = 1;
1936 cont_beg = realize_value (&t->cont_beg, c);
1939 rpd_msg (SE, _("Beginning column for CONTINUED (%d) must be "
1942 return TRNS_DROP_CASE;
1944 cont_end = realize_value (&t->cont_end, c);
1945 if (cont_end < cont_beg)
1947 rpd_msg (SE, _("Ending column for CONTINUED (%d) is less than "
1948 "beginning column (%d)."),
1949 cont_end, cont_beg);
1950 return TRNS_DROP_CASE;
1953 /* Parse the first record. */
1954 if (!skip_first_record)
1956 struct rpd_parse_info info;
1958 info.line = ls_c_str (&line);
1959 info.len = ls_length (&line);
1960 info.beg = starts_beg;
1961 info.end = starts_end;
1965 info.max_occurs = occurs_left;
1966 code = rpd_parse_record (&info);
1968 return TRNS_DROP_CASE;
1969 occurs_left -= code;
1971 else if (cont_beg == 0)
1972 return TRNS_NEXT_CASE;
1974 /* Make sure, if some occurrences are left, that we have
1975 continuation records. */
1976 if (occurs_left > 0 && cont_beg == 0)
1979 _("Number of repetitions specified on OCCURS (%d) "
1980 "exceed number of repetitions available in "
1981 "space on STARTS (%d), and CONTINUED not specified."),
1982 occurs, (starts_end - starts_beg + 1) / length);
1983 return TRNS_DROP_CASE;
1986 /* Go on to additional records. */
1987 while (occurs_left != 0)
1989 struct rpd_parse_info info;
1991 assert (occurs_left >= 0);
1993 /* Read in another record. */
1994 if (dfm_eof (t->reader))
1997 _("Unexpected end of file with %d repetitions "
1998 "remaining out of %d."),
1999 occurs_left, occurs);
2000 return TRNS_DROP_CASE;
2002 dfm_expand_tabs (t->reader);
2003 dfm_get_record (t->reader, &line);
2004 dfm_forward_record (t->reader);
2006 /* Parse this record. */
2008 info.line = ls_c_str (&line);
2009 info.len = ls_length (&line);
2010 info.beg = cont_beg;
2011 info.end = cont_end;
2015 info.max_occurs = occurs_left;
2016 code = rpd_parse_record (&info);;
2018 return TRNS_DROP_CASE;
2019 occurs_left -= code;
2022 dfm_pop (t->reader);
2024 /* FIXME: This is a kluge until we've implemented multiplexing of
2026 return TRNS_NEXT_CASE;
2029 /* Frees a REPEATING DATA transformation.
2030 Returns true if successful, false if an I/O error occurred. */
2032 repeating_data_trns_free (void *rpd_)
2034 struct repeating_data_trns *rpd = rpd_;
2036 destroy_dls_var_spec (rpd->first);
2037 dfm_close_reader (rpd->reader);
2038 free (rpd->id_value);
2043 /* Lets repeating_data_trns_proc() know how to write the cases
2044 that it composes. Not elegant. */
2046 repeating_data_set_write_case (struct transformation *trns_,
2047 write_case_func *write_case,
2048 write_case_data wc_data)
2050 struct repeating_data_trns *t = trns_->private;
2052 assert (trns_->proc == repeating_data_trns_proc);
2053 t->write_case = write_case;
2054 t->wc_data = wc_data;
2057 /* Reports a message in CLASS with the given FORMAT as text,
2058 prefixing the message with "REPEATING DATA: " to make the
2061 rpd_msg (enum msg_class class, const char *format, ...)
2067 ds_create (&text, "REPEATING DATA: ");
2068 va_start (args, format);
2069 ds_vprintf (&text, format, args);
2072 m.category = msg_class_to_category (class);
2073 m.severity = msg_class_to_severity (class);
2074 m.where.file_name = NULL;
2075 m.where.line_number = 0;
2076 m.text = ds_c_str (&text);