1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <language/data-io/data-list.h>
22 #include <libpspp/message.h>
27 #include <libpspp/alloc.h>
28 #include <data/case.h>
29 #include <language/command.h>
30 #include <libpspp/compiler.h>
31 #include <data/data-in.h>
32 #include <language/data-io/data-reader.h>
33 #include <data/dictionary.h>
34 #include <libpspp/message.h>
35 #include <language/data-io/file-handle.h>
36 #include <data/format.h>
37 #include <language/lexer/lexer.h>
38 #include <libpspp/misc.h>
39 #include <data/settings.h>
40 #include <libpspp/str.h>
41 #include <output/table.h>
42 #include <data/variable.h>
43 #include <procedure.h>
45 #include "data-list.h"
48 #define _(msgid) gettext (msgid)
50 /* Utility function. */
52 /* FIXME: Either REPEATING DATA must be the last transformation, or we
53 must multiplex the transformations that follow (i.e., perform them
54 for every case that we produce from a repetition instance).
55 Currently we do neither. We should do one or the other. */
57 /* Describes how to parse one variable. */
60 struct dls_var_spec *next; /* Next specification in list. */
62 /* Both free and fixed formats. */
63 struct fmt_spec input; /* Input format of this field. */
64 struct variable *v; /* Associated variable. Used only in
65 parsing. Not safe later. */
66 int fv; /* First value in case. */
68 /* Fixed format only. */
69 int rec; /* Record number (1-based). */
70 int fc, lc; /* Column numbers in record. */
72 /* Free format only. */
73 char name[LONG_NAME_LEN + 1]; /* Name of variable. */
76 /* Constants for DATA LIST type. */
77 /* Must match table in cmd_data_list(). */
85 /* DATA LIST private data structure. */
88 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
89 struct dfm_reader *reader; /* Data file reader. */
91 int type; /* A DLS_* constant. */
92 struct variable *end; /* Variable specified on END subcommand. */
93 int rec_cnt; /* Number of records. */
94 size_t case_size; /* Case size in bytes. */
95 char *delims; /* Delimiters if any; not null-terminated. */
96 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
99 static const struct case_source_class data_list_source_class;
101 static void rpd_msg (enum msg_class, const char *format, ...);
102 static int parse_fixed (struct data_list_pgm *);
103 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
104 static void dump_fixed_table (const struct dls_var_spec *,
105 const struct file_handle *, int rec_cnt);
106 static void dump_free_table (const struct data_list_pgm *,
107 const struct file_handle *);
108 static void destroy_dls_var_spec (struct dls_var_spec *);
109 static trns_free_func data_list_trns_free;
110 static trns_proc_func data_list_trns_proc;
115 struct data_list_pgm *dls;
116 int table = -1; /* Print table if nonzero, -1=undecided. */
117 struct file_handle *fh = fh_inline_file ();
119 if (!case_source_is_complex (vfm_source))
120 discard_variables ();
122 dls = xmalloc (sizeof *dls);
129 dls->first = dls->last = NULL;
133 if (lex_match_id ("FILE"))
136 fh = fh_parse (FH_REF_FILE | FH_REF_INLINE);
139 if (case_source_is_class (vfm_source, &file_type_source_class)
140 && fh != fh_get_default_handle ())
142 msg (SE, _("DATA LIST must use the same file "
143 "as the enclosing FILE TYPE."));
147 else if (lex_match_id ("RECORDS"))
151 if (!lex_force_int ())
153 dls->rec_cnt = lex_integer ();
157 else if (lex_match_id ("END"))
161 msg (SE, _("The END subcommand may only be specified once."));
166 if (!lex_force_id ())
168 dls->end = dict_lookup_var (default_dict, tokid);
170 dls->end = dict_create_var_assert (default_dict, tokid, 0);
173 else if (token == T_ID)
175 if (lex_match_id ("NOTABLE"))
177 else if (lex_match_id ("TABLE"))
182 if (lex_match_id ("FIXED"))
184 else if (lex_match_id ("FREE"))
186 else if (lex_match_id ("LIST"))
196 msg (SE, _("Only one of FIXED, FREE, or LIST may "
202 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
205 while (!lex_match (')'))
209 if (lex_match_id ("TAB"))
211 else if (token == T_STRING && tokstr.length == 1)
213 delim = tokstr.string[0];
222 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
223 dls->delims[dls->delim_cnt++] = delim;
237 dls->case_size = dict_get_case_size (default_dict);
238 fh_set_default_handle (fh);
241 dls->type = DLS_FIXED;
245 if (dls->type == DLS_FREE)
251 if (dls->type == DLS_FIXED)
253 if (!parse_fixed (dls))
256 dump_fixed_table (dls->first, fh, dls->rec_cnt);
260 if (!parse_free (&dls->first, &dls->last))
263 dump_free_table (dls, fh);
266 dls->reader = dfm_open_reader (fh);
267 if (dls->reader == NULL)
270 if (vfm_source != NULL)
271 add_transformation (data_list_trns_proc, data_list_trns_free, dls);
273 vfm_source = create_case_source (&data_list_source_class, dls);
278 data_list_trns_free (dls);
279 return CMD_CASCADING_FAILURE;
282 /* Adds SPEC to the linked list with head at FIRST and tail at
285 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
286 struct dls_var_spec *spec)
293 (*last)->next = spec;
297 /* Fixed-format parsing. */
299 /* Used for chaining together fortran-like format specifiers. */
302 struct fmt_list *next;
305 struct fmt_list *down;
308 /* State of parsing DATA LIST. */
309 struct fixed_parsing_state
311 char **name; /* Variable names. */
312 size_t name_cnt; /* Number of names. */
314 int recno; /* Index of current record. */
315 int sc; /* 1-based column number of starting column for
316 next field to output. */
319 static int fixed_parse_compatible (struct fixed_parsing_state *,
320 struct dls_var_spec **,
321 struct dls_var_spec **);
322 static int fixed_parse_fortran (struct fixed_parsing_state *,
323 struct dls_var_spec **,
324 struct dls_var_spec **);
326 /* Parses all the variable specifications for DATA LIST FIXED,
327 storing them into DLS. Returns nonzero if successful. */
329 parse_fixed (struct data_list_pgm *dls)
331 struct fixed_parsing_state fx;
339 while (lex_match ('/'))
342 if (lex_is_integer ())
344 if (lex_integer () < fx.recno)
346 msg (SE, _("The record number specified, %ld, is "
347 "before the previous record, %d. Data "
348 "fields must be listed in order of "
349 "increasing record number."),
350 lex_integer (), fx.recno - 1);
354 fx.recno = lex_integer ();
360 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
363 if (lex_is_number ())
365 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
368 else if (token == '(')
370 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
375 msg (SE, _("SPSS-like or FORTRAN-like format "
376 "specification expected after variable names."));
380 for (i = 0; i < fx.name_cnt; i++)
384 if (dls->first == NULL)
386 msg (SE, _("At least one variable must be specified."));
389 if (dls->rec_cnt && dls->last->rec > dls->rec_cnt)
391 msg (SE, _("Variables are specified on records that "
392 "should not exist according to RECORDS subcommand."));
395 else if (!dls->rec_cnt)
396 dls->rec_cnt = dls->last->rec;
397 return lex_end_of_command () == CMD_SUCCESS;
400 for (i = 0; i < fx.name_cnt; i++)
406 /* Parses a variable specification in the form 1-10 (A) based on
407 FX and adds specifications to the linked list with head at
408 FIRST and tail at LAST. */
410 fixed_parse_compatible (struct fixed_parsing_state *fx,
411 struct dls_var_spec **first, struct dls_var_spec **last)
413 struct fmt_spec input;
419 if (!lex_force_int ())
424 msg (SE, _("Column positions for fields must be positive."));
430 lex_negative_to_dash ();
433 if (!lex_force_int ())
438 msg (SE, _("Column positions for fields must be positive."));
443 msg (SE, _("The ending column for a field must be "
444 "greater than the starting column."));
453 /* Divide columns evenly. */
454 input.w = (lc - fc + 1) / fx->name_cnt;
455 if ((lc - fc + 1) % fx->name_cnt)
457 msg (SE, _("The %d columns %d-%d "
458 "can't be evenly divided into %d fields."),
459 lc - fc + 1, fc, lc, fx->name_cnt);
463 /* Format specifier. */
466 struct fmt_desc *fdp;
472 input.type = parse_format_specifier_name (&cp, 0);
473 if (input.type == -1)
477 msg (SE, _("A format specifier on this line "
478 "has extra characters on the end."));
488 if (lex_is_integer ())
490 if (lex_integer () < 1)
492 msg (SE, _("The value for number of decimal places "
493 "must be at least 1."));
497 input.d = lex_integer ();
503 fdp = &formats[input.type];
504 if (fdp->n_args < 2 && input.d)
506 msg (SE, _("Input format %s doesn't accept decimal places."),
514 if (!lex_force_match (')'))
522 if (!check_input_specifier (&input, 1))
525 /* Start column for next specification. */
528 /* Width of variables to create. */
529 if (input.type == FMT_A || input.type == FMT_AHEX)
534 /* Create variables and var specs. */
535 for (i = 0; i < fx->name_cnt; i++)
537 struct dls_var_spec *spec;
540 v = dict_create_var (default_dict, fx->name[i], width);
543 convert_fmt_ItoO (&input, &v->print);
545 if (!case_source_is_complex (vfm_source))
550 v = dict_lookup_var_assert (default_dict, fx->name[i]);
551 if (vfm_source == NULL)
553 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
556 if ((width != 0) != (v->width != 0))
558 msg (SE, _("There is already a variable %s of a "
563 if (width != 0 && width != v->width)
565 msg (SE, _("There is already a string variable %s of a "
566 "different width."), fx->name[i]);
571 spec = xmalloc (sizeof *spec);
575 spec->rec = fx->recno;
576 spec->fc = fc + input.w * i;
577 spec->lc = spec->fc + input.w - 1;
578 append_var_spec (first, last, spec);
583 /* Destroy format list F and, if RECURSE is nonzero, all its
586 destroy_fmt_list (struct fmt_list *f, int recurse)
588 struct fmt_list *next;
593 if (recurse && f->f.type == FMT_DESCEND)
594 destroy_fmt_list (f->down, 1);
599 /* Takes a hierarchically structured fmt_list F as constructed by
600 fixed_parse_fortran(), and flattens it, adding the variable
601 specifications to the linked list with head FIRST and tail
602 LAST. NAME_IDX is used to take values from the list of names
603 in FX; it should initially point to a value of 0. */
605 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
606 struct dls_var_spec **first, struct dls_var_spec **last,
611 for (; f; f = f->next)
612 if (f->f.type == FMT_X)
614 else if (f->f.type == FMT_T)
616 else if (f->f.type == FMT_NEWREC)
618 fx->recno += f->count;
622 for (i = 0; i < f->count; i++)
623 if (f->f.type == FMT_DESCEND)
625 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
630 struct dls_var_spec *spec;
634 if (formats[f->f.type].cat & FCAT_STRING)
638 if (*name_idx >= fx->name_cnt)
640 msg (SE, _("The number of format "
641 "specifications exceeds the given number of "
646 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
649 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
653 if (!case_source_is_complex (vfm_source))
656 spec = xmalloc (sizeof *spec);
660 spec->rec = fx->recno;
662 spec->lc = fx->sc + f->f.w - 1;
663 append_var_spec (first, last, spec);
665 convert_fmt_ItoO (&spec->input, &v->print);
673 /* Recursively parses a FORTRAN-like format specification into
674 the linked list with head FIRST and tail TAIL. LEVEL is the
675 level of recursion, starting from 0. Returns the parsed
676 specification if successful, or a null pointer on failure. */
677 static struct fmt_list *
678 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
679 struct dls_var_spec **first,
680 struct dls_var_spec **last)
682 struct fmt_list *head = NULL;
683 struct fmt_list *tail = NULL;
685 lex_force_match ('(');
689 struct fmt_list *new = xmalloc (sizeof *new);
692 /* Append new to list. */
700 if (lex_is_integer ())
702 new->count = lex_integer ();
708 /* Parse format specifier. */
711 new->f.type = FMT_DESCEND;
712 new->down = fixed_parse_fortran_internal (fx, first, last);
713 if (new->down == NULL)
716 else if (lex_match ('/'))
717 new->f.type = FMT_NEWREC;
718 else if (!parse_format_specifier (&new->f, FMTP_ALLOW_XT)
719 || !check_input_specifier (&new->f, 1))
724 lex_force_match (')');
729 destroy_fmt_list (head, 0);
734 /* Parses a FORTRAN-like format specification into the linked
735 list with head FIRST and tail LAST. Returns nonzero if
738 fixed_parse_fortran (struct fixed_parsing_state *fx,
739 struct dls_var_spec **first, struct dls_var_spec **last)
741 struct fmt_list *list;
744 list = fixed_parse_fortran_internal (fx, first, last);
749 dump_fmt_list (fx, list, first, last, &name_idx);
750 destroy_fmt_list (list, 1);
751 if (name_idx < fx->name_cnt)
753 msg (SE, _("There aren't enough format specifications "
754 "to match the number of variable names given."));
761 /* Displays a table giving information on fixed-format variable
762 parsing on DATA LIST. */
763 /* FIXME: The `Columns' column should be divided into three columns,
764 one for the starting column, one for the dash, one for the ending
765 column; then right-justify the starting column and left-justify the
768 dump_fixed_table (const struct dls_var_spec *specs,
769 const struct file_handle *fh, int rec_cnt)
771 const struct dls_var_spec *spec;
775 for (i = 0, spec = specs; spec; spec = spec->next)
777 t = tab_create (4, i + 1, 0);
778 tab_columns (t, TAB_COL_DOWN, 1);
779 tab_headers (t, 0, 0, 1, 0);
780 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
781 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
782 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
783 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
784 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
785 tab_hline (t, TAL_2, 0, 3, 1);
786 tab_dim (t, tab_natural_dimensions);
788 for (i = 1, spec = specs; spec; spec = spec->next, i++)
790 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
791 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
792 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
794 tab_text (t, 3, i, TAB_LEFT | TAB_FIX,
795 fmt_to_string (&spec->input));
798 tab_title (t, ngettext ("Reading %d record from %s.",
799 "Reading %d records from %s.", rec_cnt),
800 rec_cnt, fh_get_name (fh));
804 /* Free-format parsing. */
806 /* Parses variable specifications for DATA LIST FREE and adds
807 them to the linked list with head FIRST and tail LAST.
808 Returns nonzero only if successful. */
810 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
815 struct fmt_spec input, output;
821 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
826 if (!parse_format_specifier (&input, 0)
827 || !check_input_specifier (&input, 1)
828 || !lex_force_match (')'))
830 for (i = 0; i < name_cnt; i++)
835 convert_fmt_ItoO (&input, &output);
840 input = make_input_format (FMT_F, 8, 0);
841 output = *get_format ();
844 if (input.type == FMT_A || input.type == FMT_AHEX)
848 for (i = 0; i < name_cnt; i++)
850 struct dls_var_spec *spec;
853 v = dict_create_var (default_dict, name[i], width);
857 msg (SE, _("%s is a duplicate variable name."), name[i]);
860 v->print = v->write = output;
862 if (!case_source_is_complex (vfm_source))
865 spec = xmalloc (sizeof *spec);
869 str_copy_trunc (spec->name, sizeof spec->name, v->name);
870 append_var_spec (first, last, spec);
872 for (i = 0; i < name_cnt; i++)
877 return lex_end_of_command () == CMD_SUCCESS;
880 /* Displays a table giving information on free-format variable parsing
883 dump_free_table (const struct data_list_pgm *dls,
884 const struct file_handle *fh)
890 struct dls_var_spec *spec;
891 for (i = 0, spec = dls->first; spec; spec = spec->next)
895 t = tab_create (2, i + 1, 0);
896 tab_columns (t, TAB_COL_DOWN, 1);
897 tab_headers (t, 0, 0, 1, 0);
898 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
899 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
900 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
901 tab_hline (t, TAL_2, 0, 1, 1);
902 tab_dim (t, tab_natural_dimensions);
905 struct dls_var_spec *spec;
907 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
909 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
910 tab_text (t, 1, i, TAB_LEFT | TAB_FIX, fmt_to_string (&spec->input));
914 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
919 /* Input procedure. */
921 /* Extracts a field from the current position in the current
922 record. Fields can be unquoted or quoted with single- or
923 double-quote characters. *FIELD is set to the field content.
924 After parsing the field, sets the current position in the
925 record to just past the field and any trailing delimiter.
926 END_BLANK is used internally; it should be initialized by the
927 caller to 0 and left alone afterward. Returns 0 on failure or
928 a 1-based column number indicating the beginning of the field
931 cut_field (const struct data_list_pgm *dls, struct fixed_string *field,
934 struct fixed_string line;
938 if (dfm_eof (dls->reader))
940 if (dls->delim_cnt == 0)
941 dfm_expand_tabs (dls->reader);
942 dfm_get_record (dls->reader, &line);
944 cp = ls_c_str (&line);
945 if (dls->delim_cnt == 0)
947 /* Skip leading whitespace. */
948 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
950 if (cp >= ls_end (&line))
953 /* Handle actual data, whether quoted or unquoted. */
954 if (*cp == '\'' || *cp == '"')
958 field->string = ++cp;
959 while (cp < ls_end (&line) && *cp != quote)
961 field->length = cp - field->string;
962 if (cp < ls_end (&line))
965 msg (SW, _("Quoted string missing terminating `%c'."), quote);
970 while (cp < ls_end (&line)
971 && !isspace ((unsigned char) *cp) && *cp != ',')
973 field->length = cp - field->string;
976 /* Skip trailing whitespace and a single comma if present. */
977 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
979 if (cp < ls_end (&line) && *cp == ',')
984 if (cp >= ls_end (&line))
986 int column = dfm_column_start (dls->reader);
987 /* A blank line or a line that ends in \t has a
988 trailing blank field. */
989 if (column == 1 || (column > 1 && cp[-1] == '\t'))
994 field->string = ls_end (&line);
996 dfm_forward_record (dls->reader);
1011 while (cp < ls_end (&line)
1012 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1014 field->length = cp - field->string;
1015 if (cp < ls_end (&line))
1020 dfm_forward_columns (dls->reader, field->string - line.string);
1021 column_start = dfm_column_start (dls->reader);
1023 dfm_forward_columns (dls->reader, cp - field->string);
1025 return column_start;
1028 static bool read_from_data_list_fixed (const struct data_list_pgm *,
1030 static bool read_from_data_list_free (const struct data_list_pgm *,
1032 static bool read_from_data_list_list (const struct data_list_pgm *,
1035 /* Reads a case from DLS into C.
1036 Returns true if successful, false at end of file or on I/O error. */
1038 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
1042 dfm_push (dls->reader);
1046 retval = read_from_data_list_fixed (dls, c);
1049 retval = read_from_data_list_free (dls, c);
1052 retval = read_from_data_list_list (dls, c);
1057 dfm_pop (dls->reader);
1062 /* Reads a case from the data file into C, parsing it according
1063 to fixed-format syntax rules in DLS.
1064 Returns true if successful, false at end of file or on I/O error. */
1066 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
1068 struct dls_var_spec *var_spec = dls->first;
1071 if (dfm_eof (dls->reader))
1073 for (i = 1; i <= dls->rec_cnt; i++)
1075 struct fixed_string line;
1077 if (dfm_eof (dls->reader))
1079 /* Note that this can't occur on the first record. */
1080 msg (SW, _("Partial case of %d of %d records discarded."),
1081 i - 1, dls->rec_cnt);
1084 dfm_expand_tabs (dls->reader);
1085 dfm_get_record (dls->reader, &line);
1087 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1091 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1092 var_spec->fc, var_spec->lc);
1093 di.v = case_data_rw (c, var_spec->fv);
1094 di.flags = DI_IMPLIED_DECIMALS;
1095 di.f1 = var_spec->fc;
1096 di.format = var_spec->input;
1101 dfm_forward_record (dls->reader);
1107 /* Reads a case from the data file into C, parsing it according
1108 to free-format syntax rules in DLS.
1109 Returns true if successful, false at end of file or on I/O error. */
1111 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
1113 struct dls_var_spec *var_spec;
1116 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1118 struct fixed_string field;
1121 /* Cut out a field and read in a new record if necessary. */
1124 column = cut_field (dls, &field, &end_blank);
1128 if (!dfm_eof (dls->reader))
1129 dfm_forward_record (dls->reader);
1130 if (dfm_eof (dls->reader))
1132 if (var_spec != dls->first)
1133 msg (SW, _("Partial case discarded. The first variable "
1134 "missing was %s."), var_spec->name);
1142 di.s = ls_c_str (&field);
1143 di.e = ls_end (&field);
1144 di.v = case_data_rw (c, var_spec->fv);
1147 di.format = var_spec->input;
1154 /* Reads a case from the data file and parses it according to
1155 list-format syntax rules.
1156 Returns true if successful, false at end of file or on I/O error. */
1158 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
1160 struct dls_var_spec *var_spec;
1163 if (dfm_eof (dls->reader))
1166 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1168 struct fixed_string field;
1171 /* Cut out a field and check for end-of-line. */
1172 column = cut_field (dls, &field, &end_blank);
1175 if (get_undefined ())
1176 msg (SW, _("Missing value(s) for all variables from %s onward. "
1177 "These will be filled with the system-missing value "
1178 "or blanks, as appropriate."),
1180 for (; var_spec; var_spec = var_spec->next)
1182 int width = get_format_var_width (&var_spec->input);
1184 case_data_rw (c, var_spec->fv)->f = SYSMIS;
1186 memset (case_data_rw (c, var_spec->fv)->s, ' ', width);
1194 di.s = ls_c_str (&field);
1195 di.e = ls_end (&field);
1196 di.v = case_data_rw (c, var_spec->fv);
1199 di.format = var_spec->input;
1204 dfm_forward_record (dls->reader);
1208 /* Destroys SPEC. */
1210 destroy_dls_var_spec (struct dls_var_spec *spec)
1212 struct dls_var_spec *next;
1214 while (spec != NULL)
1222 /* Destroys DATA LIST transformation DLS.
1223 Returns true if successful, false if an I/O error occurred. */
1225 data_list_trns_free (void *dls_)
1227 struct data_list_pgm *dls = dls_;
1229 destroy_dls_var_spec (dls->first);
1230 dfm_close_reader (dls->reader);
1235 /* Handle DATA LIST transformation DLS, parsing data into C. */
1237 data_list_trns_proc (void *dls_, struct ccase *c, int case_num UNUSED)
1239 struct data_list_pgm *dls = dls_;
1242 if (read_from_data_list (dls, c))
1243 retval = TRNS_CONTINUE;
1244 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
1246 /* An I/O error, or encountering end of file for a second
1247 time, should be escalated into a more serious error. */
1248 retval = TRNS_ERROR;
1251 retval = TRNS_DROP_CASE;
1253 /* If there was an END subcommand handle it. */
1254 if (dls->end != NULL)
1256 double *end = &case_data_rw (c, dls->end->fv)->f;
1257 if (retval == TRNS_DROP_CASE)
1260 retval = TRNS_CONTINUE;
1269 /* Reads all the records from the data file and passes them to
1271 Returns true if successful, false if an I/O error occurred. */
1273 data_list_source_read (struct case_source *source,
1275 write_case_func *write_case, write_case_data wc_data)
1277 struct data_list_pgm *dls = source->aux;
1283 if (!read_from_data_list (dls, c))
1284 return !dfm_reader_error (dls->reader);
1286 dfm_push (dls->reader);
1287 ok = write_case (wc_data);
1288 dfm_pop (dls->reader);
1294 /* Destroys the source's internal data. */
1296 data_list_source_destroy (struct case_source *source)
1298 data_list_trns_free (source->aux);
1301 static const struct case_source_class data_list_source_class =
1305 data_list_source_read,
1306 data_list_source_destroy,
1309 /* REPEATING DATA. */
1311 /* Represents a number or a variable. */
1312 struct rpd_num_or_var
1314 int num; /* Value, or 0. */
1315 struct variable *var; /* Variable, if number==0. */
1318 /* REPEATING DATA private data structure. */
1319 struct repeating_data_trns
1321 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1322 struct dfm_reader *reader; /* Input file, never NULL. */
1324 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1325 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1326 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1327 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1328 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1329 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1331 /* ID subcommand. */
1332 int id_beg, id_end; /* Beginning & end columns. */
1333 struct variable *id_var; /* DATA LIST variable. */
1334 struct fmt_spec id_spec; /* Input format spec. */
1335 union value *id_value; /* ID value. */
1337 write_case_func *write_case;
1338 write_case_data wc_data;
1341 static trns_free_func repeating_data_trns_free;
1342 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1343 static int parse_repeating_data (struct dls_var_spec **,
1344 struct dls_var_spec **);
1345 static void find_variable_input_spec (struct variable *v,
1346 struct fmt_spec *spec);
1348 int cmd_repeating_data (void);
1350 /* Parses the REPEATING DATA command. */
1352 cmd_repeating_data (void)
1354 struct repeating_data_trns *rpd;
1355 int table = 1; /* Print table? */
1356 bool saw_starts = false; /* Saw STARTS subcommand? */
1357 bool saw_occurs = false; /* Saw OCCURS subcommand? */
1358 bool saw_length = false; /* Saw LENGTH subcommand? */
1359 bool saw_continued = false; /* Saw CONTINUED subcommand? */
1360 bool saw_id = false; /* Saw ID subcommand? */
1361 struct file_handle *const fh = fh_get_default_handle ();
1363 assert (case_source_is_complex (vfm_source));
1365 rpd = xmalloc (sizeof *rpd);
1366 rpd->reader = dfm_open_reader (fh);
1367 rpd->first = rpd->last = NULL;
1368 rpd->starts_beg.num = 0;
1369 rpd->starts_beg.var = NULL;
1370 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1371 = rpd->cont_end = rpd->starts_beg;
1372 rpd->id_beg = rpd->id_end = 0;
1374 rpd->id_value = NULL;
1380 if (lex_match_id ("FILE"))
1382 struct file_handle *file;
1384 file = fh_parse (FH_REF_FILE | FH_REF_INLINE);
1389 msg (SE, _("REPEATING DATA must use the same file as its "
1390 "corresponding DATA LIST or FILE TYPE."));
1394 else if (lex_match_id ("STARTS"))
1399 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1404 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1407 lex_negative_to_dash ();
1408 if (lex_match ('-'))
1410 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1413 /* Otherwise, rpd->starts_end is uninitialized. We
1414 will initialize it later from the record length
1415 of the file. We can't do so now because the
1416 file handle may not be specified yet. */
1419 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1420 && rpd->starts_beg.num > rpd->starts_end.num)
1422 msg (SE, _("STARTS beginning column (%d) exceeds "
1423 "STARTS ending column (%d)."),
1424 rpd->starts_beg.num, rpd->starts_end.num);
1428 else if (lex_match_id ("OCCURS"))
1433 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1438 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1441 else if (lex_match_id ("LENGTH"))
1446 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1451 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1454 else if (lex_match_id ("CONTINUED"))
1459 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1462 saw_continued = true;
1464 if (!lex_match ('/'))
1466 if (!parse_num_or_var (&rpd->cont_beg,
1467 "CONTINUED beginning column"))
1470 lex_negative_to_dash ();
1472 && !parse_num_or_var (&rpd->cont_end,
1473 "CONTINUED ending column"))
1476 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1477 && rpd->cont_beg.num > rpd->cont_end.num)
1479 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1480 "CONTINUED ending column (%d)."),
1481 rpd->cont_beg.num, rpd->cont_end.num);
1486 rpd->cont_beg.num = 1;
1488 else if (lex_match_id ("ID"))
1493 msg (SE, _("%s subcommand given multiple times."),"ID");
1498 if (!lex_force_int ())
1500 if (lex_integer () < 1)
1502 msg (SE, _("ID beginning column (%ld) must be positive."),
1506 rpd->id_beg = lex_integer ();
1509 lex_negative_to_dash ();
1511 if (lex_match ('-'))
1513 if (!lex_force_int ())
1515 if (lex_integer () < 1)
1517 msg (SE, _("ID ending column (%ld) must be positive."),
1521 if (lex_integer () < rpd->id_end)
1523 msg (SE, _("ID ending column (%ld) cannot be less than "
1524 "ID beginning column (%d)."),
1525 lex_integer (), rpd->id_beg);
1529 rpd->id_end = lex_integer ();
1532 else rpd->id_end = rpd->id_beg;
1534 if (!lex_force_match ('='))
1536 rpd->id_var = parse_variable ();
1537 if (rpd->id_var == NULL)
1540 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1541 rpd->id_value = xnmalloc (rpd->id_var->nv, sizeof *rpd->id_value);
1543 else if (lex_match_id ("TABLE"))
1545 else if (lex_match_id ("NOTABLE"))
1547 else if (lex_match_id ("DATA"))
1555 if (!lex_force_match ('/'))
1559 /* Comes here when DATA specification encountered. */
1560 if (!saw_starts || !saw_occurs)
1563 msg (SE, _("Missing required specification STARTS."));
1565 msg (SE, _("Missing required specification OCCURS."));
1569 /* Enforce ID restriction. */
1570 if (saw_id && !saw_continued)
1572 msg (SE, _("ID specified without CONTINUED."));
1576 /* Calculate and check starts_end, cont_end if necessary. */
1577 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1579 rpd->starts_end.num = fh_get_record_width (fh);
1580 if (rpd->starts_beg.num != 0
1581 && rpd->starts_beg.num > rpd->starts_end.num)
1583 msg (SE, _("STARTS beginning column (%d) exceeds "
1584 "default STARTS ending column taken from file's "
1585 "record width (%d)."),
1586 rpd->starts_beg.num, rpd->starts_end.num);
1590 if (rpd->cont_end.num == 0 && rpd->cont_end.var == NULL)
1592 rpd->cont_end.num = fh_get_record_width (fh);
1593 if (rpd->cont_beg.num != 0
1594 && rpd->cont_beg.num > rpd->cont_end.num)
1596 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1597 "default CONTINUED ending column taken from file's "
1598 "record width (%d)."),
1599 rpd->cont_beg.num, rpd->cont_end.num);
1605 if (!parse_repeating_data (&rpd->first, &rpd->last))
1608 /* Calculate length if necessary. */
1611 struct dls_var_spec *iter;
1613 for (iter = rpd->first; iter; iter = iter->next)
1614 if (iter->lc > rpd->length.num)
1615 rpd->length.num = iter->lc;
1616 assert (rpd->length.num != 0);
1620 dump_fixed_table (rpd->first, fh, rpd->last->rec);
1622 add_transformation (repeating_data_trns_proc, repeating_data_trns_free, rpd);
1624 return lex_end_of_command ();
1627 repeating_data_trns_free (rpd);
1628 return CMD_CASCADING_FAILURE;
1631 /* Finds the input format specification for variable V and puts
1632 it in SPEC. Because of the way that DATA LIST is structured,
1633 this is nontrivial. */
1635 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1639 for (i = 0; i < n_trns; i++)
1641 struct transformation *trns = &t_trns[i];
1643 if (trns->proc == data_list_trns_proc)
1645 struct data_list_pgm *pgm = trns->private;
1646 struct dls_var_spec *iter;
1648 for (iter = pgm->first; iter; iter = iter->next)
1651 *spec = iter->input;
1660 /* Parses a number or a variable name from the syntax file and puts
1661 the results in VALUE. Ensures that the number is at least 1; else
1662 emits an error based on MESSAGE. Returns nonzero only if
1665 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1670 value->var = parse_variable ();
1671 if (value->var == NULL)
1673 if (value->var->type == ALPHA)
1675 msg (SE, _("String variable not allowed here."));
1679 else if (lex_is_integer ())
1681 value->num = lex_integer ();
1685 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1691 msg (SE, _("Variable or integer expected for %s."), message);
1697 /* Parses data specifications for repeating data groups, adding
1698 them to the linked list with head FIRST and tail LAST.
1699 Returns nonzero only if successful. */
1701 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1703 struct fixed_parsing_state fx;
1709 while (token != '.')
1711 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1714 if (lex_is_number ())
1716 if (!fixed_parse_compatible (&fx, first, last))
1719 else if (token == '(')
1721 if (!fixed_parse_fortran (&fx, first, last))
1726 msg (SE, _("SPSS-like or FORTRAN-like format "
1727 "specification expected after variable names."));
1731 for (i = 0; i < fx.name_cnt; i++)
1739 for (i = 0; i < fx.name_cnt; i++)
1745 /* Obtains the real value for rpd_num_or_var N in case C and returns
1746 it. The valid range is nonnegative numbers, but numbers outside
1747 this range can be returned and should be handled by the caller as
1750 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1754 double v = case_num (c, n->var->fv);
1755 return v != SYSMIS && v >= INT_MIN && v <= INT_MAX ? v : -1;
1761 /* Parameter record passed to rpd_parse_record(). */
1762 struct rpd_parse_info
1764 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1765 const char *line; /* Line being parsed. */
1766 size_t len; /* Line length. */
1767 int beg, end; /* First and last column of first occurrence. */
1768 int ofs; /* Column offset between repeated occurrences. */
1769 struct ccase *c; /* Case to fill in. */
1770 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1771 int max_occurs; /* Max number of occurrences to parse. */
1774 /* Parses one record of repeated data and outputs corresponding
1775 cases. Returns number of occurrences parsed up to the
1776 maximum specified in INFO. */
1778 rpd_parse_record (const struct rpd_parse_info *info)
1780 struct repeating_data_trns *t = info->trns;
1781 int cur = info->beg;
1784 /* Handle record ID values. */
1787 union value id_temp[MAX_ELEMS_PER_VALUE];
1789 /* Parse record ID into V. */
1793 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1794 di.v = info->verify_id ? id_temp : t->id_value;
1797 di.format = t->id_spec;
1804 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1806 char expected_str [MAX_FORMATTED_LEN + 1];
1807 char actual_str [MAX_FORMATTED_LEN + 1];
1809 data_out (expected_str, &t->id_var->print, t->id_value);
1810 expected_str[t->id_var->print.w] = '\0';
1812 data_out (actual_str, &t->id_var->print, id_temp);
1813 actual_str[t->id_var->print.w] = '\0';
1816 _("Encountered mismatched record ID \"%s\" "
1817 "expecting \"%s\"."),
1818 actual_str, expected_str);
1824 /* Iterate over the set of expected occurrences and record each of
1825 them as a separate case. FIXME: We need to execute any
1826 transformations that follow the current one. */
1830 for (occurrences = 0; occurrences < info->max_occurs; )
1832 if (cur + info->ofs > info->end + 1)
1837 struct dls_var_spec *var_spec = t->first;
1839 for (; var_spec; var_spec = var_spec->next)
1841 int fc = var_spec->fc - 1 + cur;
1842 int lc = var_spec->lc - 1 + cur;
1844 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1849 _("Variable %s starting in column %d extends "
1850 "beyond physical record length of %d."),
1851 var_spec->v->name, fc, info->len);
1857 data_in_finite_line (&di, info->line, info->len, fc, lc);
1858 di.v = case_data_rw (info->c, var_spec->fv);
1861 di.format = var_spec->input;
1871 if (!t->write_case (t->wc_data))
1879 /* Reads one set of repetitions of the elements in the REPEATING
1880 DATA structure. Returns TRNS_CONTINUE on success,
1881 TRNS_DROP_CASE on end of file or on failure, or TRNS_NEXT_CASE. */
1883 repeating_data_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
1885 struct repeating_data_trns *t = trns_;
1887 struct fixed_string line; /* Current record. */
1889 int starts_beg; /* Starting column. */
1890 int starts_end; /* Ending column. */
1891 int occurs; /* Number of repetitions. */
1892 int length; /* Length of each occurrence. */
1893 int cont_beg; /* Starting column for continuation lines. */
1894 int cont_end; /* Ending column for continuation lines. */
1896 int occurs_left; /* Number of occurrences remaining. */
1898 int code; /* Return value from rpd_parse_record(). */
1900 int skip_first_record = 0;
1902 dfm_push (t->reader);
1904 /* Read the current record. */
1905 dfm_reread_record (t->reader, 1);
1906 dfm_expand_tabs (t->reader);
1907 if (dfm_eof (t->reader))
1908 return TRNS_DROP_CASE;
1909 dfm_get_record (t->reader, &line);
1910 dfm_forward_record (t->reader);
1912 /* Calculate occurs, length. */
1913 occurs_left = occurs = realize_value (&t->occurs, c);
1916 rpd_msg (SE, _("Invalid value %d for OCCURS."), occurs);
1917 return TRNS_NEXT_CASE;
1919 starts_beg = realize_value (&t->starts_beg, c);
1920 if (starts_beg <= 0)
1922 rpd_msg (SE, _("Beginning column for STARTS (%d) must be at least 1."),
1924 return TRNS_NEXT_CASE;
1926 starts_end = realize_value (&t->starts_end, c);
1927 if (starts_end < starts_beg)
1929 rpd_msg (SE, _("Ending column for STARTS (%d) is less than "
1930 "beginning column (%d)."),
1931 starts_end, starts_beg);
1932 skip_first_record = 1;
1934 length = realize_value (&t->length, c);
1937 rpd_msg (SE, _("Invalid value %d for LENGTH."), length);
1939 occurs = occurs_left = 1;
1941 cont_beg = realize_value (&t->cont_beg, c);
1944 rpd_msg (SE, _("Beginning column for CONTINUED (%d) must be "
1947 return TRNS_DROP_CASE;
1949 cont_end = realize_value (&t->cont_end, c);
1950 if (cont_end < cont_beg)
1952 rpd_msg (SE, _("Ending column for CONTINUED (%d) is less than "
1953 "beginning column (%d)."),
1954 cont_end, cont_beg);
1955 return TRNS_DROP_CASE;
1958 /* Parse the first record. */
1959 if (!skip_first_record)
1961 struct rpd_parse_info info;
1963 info.line = ls_c_str (&line);
1964 info.len = ls_length (&line);
1965 info.beg = starts_beg;
1966 info.end = starts_end;
1970 info.max_occurs = occurs_left;
1971 code = rpd_parse_record (&info);
1973 return TRNS_DROP_CASE;
1974 occurs_left -= code;
1976 else if (cont_beg == 0)
1977 return TRNS_NEXT_CASE;
1979 /* Make sure, if some occurrences are left, that we have
1980 continuation records. */
1981 if (occurs_left > 0 && cont_beg == 0)
1984 _("Number of repetitions specified on OCCURS (%d) "
1985 "exceed number of repetitions available in "
1986 "space on STARTS (%d), and CONTINUED not specified."),
1987 occurs, (starts_end - starts_beg + 1) / length);
1988 return TRNS_DROP_CASE;
1991 /* Go on to additional records. */
1992 while (occurs_left != 0)
1994 struct rpd_parse_info info;
1996 assert (occurs_left >= 0);
1998 /* Read in another record. */
1999 if (dfm_eof (t->reader))
2002 _("Unexpected end of file with %d repetitions "
2003 "remaining out of %d."),
2004 occurs_left, occurs);
2005 return TRNS_DROP_CASE;
2007 dfm_expand_tabs (t->reader);
2008 dfm_get_record (t->reader, &line);
2009 dfm_forward_record (t->reader);
2011 /* Parse this record. */
2013 info.line = ls_c_str (&line);
2014 info.len = ls_length (&line);
2015 info.beg = cont_beg;
2016 info.end = cont_end;
2020 info.max_occurs = occurs_left;
2021 code = rpd_parse_record (&info);;
2023 return TRNS_DROP_CASE;
2024 occurs_left -= code;
2027 dfm_pop (t->reader);
2029 /* FIXME: This is a kluge until we've implemented multiplexing of
2031 return TRNS_NEXT_CASE;
2034 /* Frees a REPEATING DATA transformation.
2035 Returns true if successful, false if an I/O error occurred. */
2037 repeating_data_trns_free (void *rpd_)
2039 struct repeating_data_trns *rpd = rpd_;
2041 destroy_dls_var_spec (rpd->first);
2042 dfm_close_reader (rpd->reader);
2043 free (rpd->id_value);
2048 /* Lets repeating_data_trns_proc() know how to write the cases
2049 that it composes. Not elegant. */
2051 repeating_data_set_write_case (struct transformation *trns_,
2052 write_case_func *write_case,
2053 write_case_data wc_data)
2055 struct repeating_data_trns *t = trns_->private;
2057 assert (trns_->proc == repeating_data_trns_proc);
2058 t->write_case = write_case;
2059 t->wc_data = wc_data;
2062 /* Reports a message in CLASS with the given FORMAT as text,
2063 prefixing the message with "REPEATING DATA: " to make the
2066 rpd_msg (enum msg_class class, const char *format, ...)
2072 ds_create (&text, "REPEATING DATA: ");
2073 va_start (args, format);
2074 ds_vprintf (&text, format, args);
2077 e.category = msg_class_to_category (class);
2078 e.severity = msg_class_to_severity (class);
2079 e.where.file_name = NULL;
2080 e.where.line_number = 0;
2081 e.text = ds_c_str (&text);