1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 #include "data-list.h"
29 #include <data/case.h>
30 #include <data/data-in.h>
31 #include <data/dictionary.h>
32 #include <data/format.h>
33 #include <data/settings.h>
34 #include <data/variable.h>
35 #include <language/command.h>
36 #include <language/data-io/data-list.h>
37 #include <language/data-io/data-reader.h>
38 #include <language/data-io/file-handle.h>
39 #include <language/data-io/file-type.h>
40 #include <language/data-io/inpt-pgm.h>
41 #include <language/lexer/lexer.h>
42 #include <libpspp/alloc.h>
43 #include <libpspp/compiler.h>
44 #include <libpspp/message.h>
45 #include <libpspp/message.h>
46 #include <libpspp/misc.h>
47 #include <libpspp/str.h>
48 #include <output/table.h>
49 #include <procedure.h>
52 #define _(msgid) gettext (msgid)
54 /* Utility function. */
56 /* FIXME: Either REPEATING DATA must be the last transformation, or we
57 must multiplex the transformations that follow (i.e., perform them
58 for every case that we produce from a repetition instance).
59 Currently we do neither. We should do one or the other. */
61 /* Describes how to parse one variable. */
64 struct dls_var_spec *next; /* Next specification in list. */
66 /* Both free and fixed formats. */
67 struct fmt_spec input; /* Input format of this field. */
68 struct variable *v; /* Associated variable. Used only in
69 parsing. Not safe later. */
70 int fv; /* First value in case. */
72 /* Fixed format only. */
73 int rec; /* Record number (1-based). */
74 int fc, lc; /* Column numbers in record. */
76 /* Free format only. */
77 char name[LONG_NAME_LEN + 1]; /* Name of variable. */
80 /* Constants for DATA LIST type. */
81 /* Must match table in cmd_data_list(). */
89 /* DATA LIST private data structure. */
92 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
93 struct dfm_reader *reader; /* Data file reader. */
95 int type; /* A DLS_* constant. */
96 struct variable *end; /* Variable specified on END subcommand. */
97 int rec_cnt; /* Number of records. */
98 size_t case_size; /* Case size in bytes. */
99 char *delims; /* Delimiters if any; not null-terminated. */
100 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
103 static const struct case_source_class data_list_source_class;
105 static void rpd_msg (enum msg_class, const char *format, ...);
106 static int parse_fixed (struct data_list_pgm *);
107 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
108 static void dump_fixed_table (const struct dls_var_spec *,
109 const struct file_handle *, int rec_cnt);
110 static void dump_free_table (const struct data_list_pgm *,
111 const struct file_handle *);
112 static void destroy_dls_var_spec (struct dls_var_spec *);
113 static trns_free_func data_list_trns_free;
114 static trns_proc_func data_list_trns_proc;
119 struct data_list_pgm *dls;
120 int table = -1; /* Print table if nonzero, -1=undecided. */
121 struct file_handle *fh = fh_inline_file ();
123 if (!in_input_program () && !in_file_type ())
124 discard_variables ();
126 dls = xmalloc (sizeof *dls);
133 dls->first = dls->last = NULL;
137 if (lex_match_id ("FILE"))
140 fh = fh_parse (FH_REF_FILE | FH_REF_INLINE);
143 if (in_file_type () && fh != fh_get_default_handle ())
145 msg (SE, _("DATA LIST must use the same file "
146 "as the enclosing FILE TYPE."));
150 else if (lex_match_id ("RECORDS"))
154 if (!lex_force_int ())
156 dls->rec_cnt = lex_integer ();
160 else if (lex_match_id ("END"))
164 msg (SE, _("The END subcommand may only be specified once."));
169 if (!lex_force_id ())
171 dls->end = dict_lookup_var (default_dict, tokid);
173 dls->end = dict_create_var_assert (default_dict, tokid, 0);
176 else if (token == T_ID)
178 if (lex_match_id ("NOTABLE"))
180 else if (lex_match_id ("TABLE"))
185 if (lex_match_id ("FIXED"))
187 else if (lex_match_id ("FREE"))
189 else if (lex_match_id ("LIST"))
199 msg (SE, _("Only one of FIXED, FREE, or LIST may "
205 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
208 while (!lex_match (')'))
212 if (lex_match_id ("TAB"))
214 else if (token == T_STRING && tokstr.length == 1)
216 delim = tokstr.string[0];
225 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
226 dls->delims[dls->delim_cnt++] = delim;
240 dls->case_size = dict_get_case_size (default_dict);
241 fh_set_default_handle (fh);
244 dls->type = DLS_FIXED;
248 if (dls->type == DLS_FREE)
254 if (dls->type == DLS_FIXED)
256 if (!parse_fixed (dls))
259 dump_fixed_table (dls->first, fh, dls->rec_cnt);
263 if (!parse_free (&dls->first, &dls->last))
266 dump_free_table (dls, fh);
269 dls->reader = dfm_open_reader (fh);
270 if (dls->reader == NULL)
273 if (vfm_source != NULL)
274 add_transformation (data_list_trns_proc, data_list_trns_free, dls);
276 vfm_source = create_case_source (&data_list_source_class, dls);
281 data_list_trns_free (dls);
282 return CMD_CASCADING_FAILURE;
285 /* Adds SPEC to the linked list with head at FIRST and tail at
288 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
289 struct dls_var_spec *spec)
296 (*last)->next = spec;
300 /* Fixed-format parsing. */
302 /* Used for chaining together fortran-like format specifiers. */
305 struct fmt_list *next;
308 struct fmt_list *down;
311 /* State of parsing DATA LIST. */
312 struct fixed_parsing_state
314 char **name; /* Variable names. */
315 size_t name_cnt; /* Number of names. */
317 int recno; /* Index of current record. */
318 int sc; /* 1-based column number of starting column for
319 next field to output. */
322 static int fixed_parse_compatible (struct fixed_parsing_state *,
323 struct dls_var_spec **,
324 struct dls_var_spec **);
325 static int fixed_parse_fortran (struct fixed_parsing_state *,
326 struct dls_var_spec **,
327 struct dls_var_spec **);
329 /* Parses all the variable specifications for DATA LIST FIXED,
330 storing them into DLS. Returns nonzero if successful. */
332 parse_fixed (struct data_list_pgm *dls)
334 struct fixed_parsing_state fx;
342 while (lex_match ('/'))
345 if (lex_is_integer ())
347 if (lex_integer () < fx.recno)
349 msg (SE, _("The record number specified, %ld, is "
350 "before the previous record, %d. Data "
351 "fields must be listed in order of "
352 "increasing record number."),
353 lex_integer (), fx.recno - 1);
357 fx.recno = lex_integer ();
363 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
366 if (lex_is_number ())
368 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
371 else if (token == '(')
373 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
378 msg (SE, _("SPSS-like or FORTRAN-like format "
379 "specification expected after variable names."));
383 for (i = 0; i < fx.name_cnt; i++)
387 if (dls->first == NULL)
389 msg (SE, _("At least one variable must be specified."));
392 if (dls->rec_cnt && dls->last->rec > dls->rec_cnt)
394 msg (SE, _("Variables are specified on records that "
395 "should not exist according to RECORDS subcommand."));
398 else if (!dls->rec_cnt)
399 dls->rec_cnt = dls->last->rec;
400 return lex_end_of_command () == CMD_SUCCESS;
403 for (i = 0; i < fx.name_cnt; i++)
409 /* Parses a variable specification in the form 1-10 (A) based on
410 FX and adds specifications to the linked list with head at
411 FIRST and tail at LAST. */
413 fixed_parse_compatible (struct fixed_parsing_state *fx,
414 struct dls_var_spec **first, struct dls_var_spec **last)
416 struct fmt_spec input;
422 if (!lex_force_int ())
427 msg (SE, _("Column positions for fields must be positive."));
433 lex_negative_to_dash ();
436 if (!lex_force_int ())
441 msg (SE, _("Column positions for fields must be positive."));
446 msg (SE, _("The ending column for a field must be "
447 "greater than the starting column."));
456 /* Divide columns evenly. */
457 input.w = (lc - fc + 1) / fx->name_cnt;
458 if ((lc - fc + 1) % fx->name_cnt)
460 msg (SE, _("The %d columns %d-%d "
461 "can't be evenly divided into %d fields."),
462 lc - fc + 1, fc, lc, fx->name_cnt);
466 /* Format specifier. */
469 struct fmt_desc *fdp;
475 input.type = parse_format_specifier_name (&cp, 0);
476 if (input.type == -1)
480 msg (SE, _("A format specifier on this line "
481 "has extra characters on the end."));
491 if (lex_is_integer ())
493 if (lex_integer () < 1)
495 msg (SE, _("The value for number of decimal places "
496 "must be at least 1."));
500 input.d = lex_integer ();
506 fdp = &formats[input.type];
507 if (fdp->n_args < 2 && input.d)
509 msg (SE, _("Input format %s doesn't accept decimal places."),
517 if (!lex_force_match (')'))
525 if (!check_input_specifier (&input, 1))
528 /* Start column for next specification. */
531 /* Width of variables to create. */
532 if (input.type == FMT_A || input.type == FMT_AHEX)
537 /* Create variables and var specs. */
538 for (i = 0; i < fx->name_cnt; i++)
540 struct dls_var_spec *spec;
543 v = dict_create_var (default_dict, fx->name[i], width);
546 convert_fmt_ItoO (&input, &v->print);
548 if (!in_input_program () && !in_file_type ())
553 v = dict_lookup_var_assert (default_dict, fx->name[i]);
554 if (vfm_source == NULL)
556 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
559 if ((width != 0) != (v->width != 0))
561 msg (SE, _("There is already a variable %s of a "
566 if (width != 0 && width != v->width)
568 msg (SE, _("There is already a string variable %s of a "
569 "different width."), fx->name[i]);
574 spec = xmalloc (sizeof *spec);
578 spec->rec = fx->recno;
579 spec->fc = fc + input.w * i;
580 spec->lc = spec->fc + input.w - 1;
581 append_var_spec (first, last, spec);
586 /* Destroy format list F and, if RECURSE is nonzero, all its
589 destroy_fmt_list (struct fmt_list *f, int recurse)
591 struct fmt_list *next;
596 if (recurse && f->f.type == FMT_DESCEND)
597 destroy_fmt_list (f->down, 1);
602 /* Takes a hierarchically structured fmt_list F as constructed by
603 fixed_parse_fortran(), and flattens it, adding the variable
604 specifications to the linked list with head FIRST and tail
605 LAST. NAME_IDX is used to take values from the list of names
606 in FX; it should initially point to a value of 0. */
608 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
609 struct dls_var_spec **first, struct dls_var_spec **last,
614 for (; f; f = f->next)
615 if (f->f.type == FMT_X)
617 else if (f->f.type == FMT_T)
619 else if (f->f.type == FMT_NEWREC)
621 fx->recno += f->count;
625 for (i = 0; i < f->count; i++)
626 if (f->f.type == FMT_DESCEND)
628 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
633 struct dls_var_spec *spec;
637 if (formats[f->f.type].cat & FCAT_STRING)
641 if (*name_idx >= fx->name_cnt)
643 msg (SE, _("The number of format "
644 "specifications exceeds the given number of "
649 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
652 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
656 if (!in_input_program () && !in_file_type ())
659 spec = xmalloc (sizeof *spec);
663 spec->rec = fx->recno;
665 spec->lc = fx->sc + f->f.w - 1;
666 append_var_spec (first, last, spec);
668 convert_fmt_ItoO (&spec->input, &v->print);
676 /* Recursively parses a FORTRAN-like format specification into
677 the linked list with head FIRST and tail TAIL. LEVEL is the
678 level of recursion, starting from 0. Returns the parsed
679 specification if successful, or a null pointer on failure. */
680 static struct fmt_list *
681 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
682 struct dls_var_spec **first,
683 struct dls_var_spec **last)
685 struct fmt_list *head = NULL;
686 struct fmt_list *tail = NULL;
688 lex_force_match ('(');
692 struct fmt_list *new = xmalloc (sizeof *new);
695 /* Append new to list. */
703 if (lex_is_integer ())
705 new->count = lex_integer ();
711 /* Parse format specifier. */
714 new->f.type = FMT_DESCEND;
715 new->down = fixed_parse_fortran_internal (fx, first, last);
716 if (new->down == NULL)
719 else if (lex_match ('/'))
720 new->f.type = FMT_NEWREC;
721 else if (!parse_format_specifier (&new->f, FMTP_ALLOW_XT)
722 || !check_input_specifier (&new->f, 1))
727 lex_force_match (')');
732 destroy_fmt_list (head, 0);
737 /* Parses a FORTRAN-like format specification into the linked
738 list with head FIRST and tail LAST. Returns nonzero if
741 fixed_parse_fortran (struct fixed_parsing_state *fx,
742 struct dls_var_spec **first, struct dls_var_spec **last)
744 struct fmt_list *list;
747 list = fixed_parse_fortran_internal (fx, first, last);
752 dump_fmt_list (fx, list, first, last, &name_idx);
753 destroy_fmt_list (list, 1);
754 if (name_idx < fx->name_cnt)
756 msg (SE, _("There aren't enough format specifications "
757 "to match the number of variable names given."));
764 /* Displays a table giving information on fixed-format variable
765 parsing on DATA LIST. */
766 /* FIXME: The `Columns' column should be divided into three columns,
767 one for the starting column, one for the dash, one for the ending
768 column; then right-justify the starting column and left-justify the
771 dump_fixed_table (const struct dls_var_spec *specs,
772 const struct file_handle *fh, int rec_cnt)
774 const struct dls_var_spec *spec;
778 for (i = 0, spec = specs; spec; spec = spec->next)
780 t = tab_create (4, i + 1, 0);
781 tab_columns (t, TAB_COL_DOWN, 1);
782 tab_headers (t, 0, 0, 1, 0);
783 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
784 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
785 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
786 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
787 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
788 tab_hline (t, TAL_2, 0, 3, 1);
789 tab_dim (t, tab_natural_dimensions);
791 for (i = 1, spec = specs; spec; spec = spec->next, i++)
793 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
794 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
795 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
797 tab_text (t, 3, i, TAB_LEFT | TAB_FIX,
798 fmt_to_string (&spec->input));
801 tab_title (t, ngettext ("Reading %d record from %s.",
802 "Reading %d records from %s.", rec_cnt),
803 rec_cnt, fh_get_name (fh));
807 /* Free-format parsing. */
809 /* Parses variable specifications for DATA LIST FREE and adds
810 them to the linked list with head FIRST and tail LAST.
811 Returns nonzero only if successful. */
813 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
818 struct fmt_spec input, output;
824 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
829 if (!parse_format_specifier (&input, 0)
830 || !check_input_specifier (&input, 1)
831 || !lex_force_match (')'))
833 for (i = 0; i < name_cnt; i++)
838 convert_fmt_ItoO (&input, &output);
843 input = make_input_format (FMT_F, 8, 0);
844 output = *get_format ();
847 if (input.type == FMT_A || input.type == FMT_AHEX)
851 for (i = 0; i < name_cnt; i++)
853 struct dls_var_spec *spec;
856 v = dict_create_var (default_dict, name[i], width);
860 msg (SE, _("%s is a duplicate variable name."), name[i]);
863 v->print = v->write = output;
865 if (!in_input_program () && !in_file_type ())
868 spec = xmalloc (sizeof *spec);
872 str_copy_trunc (spec->name, sizeof spec->name, v->name);
873 append_var_spec (first, last, spec);
875 for (i = 0; i < name_cnt; i++)
880 return lex_end_of_command () == CMD_SUCCESS;
883 /* Displays a table giving information on free-format variable parsing
886 dump_free_table (const struct data_list_pgm *dls,
887 const struct file_handle *fh)
893 struct dls_var_spec *spec;
894 for (i = 0, spec = dls->first; spec; spec = spec->next)
898 t = tab_create (2, i + 1, 0);
899 tab_columns (t, TAB_COL_DOWN, 1);
900 tab_headers (t, 0, 0, 1, 0);
901 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
902 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
903 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
904 tab_hline (t, TAL_2, 0, 1, 1);
905 tab_dim (t, tab_natural_dimensions);
908 struct dls_var_spec *spec;
910 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
912 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
913 tab_text (t, 1, i, TAB_LEFT | TAB_FIX, fmt_to_string (&spec->input));
917 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
922 /* Input procedure. */
924 /* Extracts a field from the current position in the current
925 record. Fields can be unquoted or quoted with single- or
926 double-quote characters. *FIELD is set to the field content.
927 After parsing the field, sets the current position in the
928 record to just past the field and any trailing delimiter.
929 END_BLANK is used internally; it should be initialized by the
930 caller to 0 and left alone afterward. Returns 0 on failure or
931 a 1-based column number indicating the beginning of the field
934 cut_field (const struct data_list_pgm *dls, struct fixed_string *field,
937 struct fixed_string line;
941 if (dfm_eof (dls->reader))
943 if (dls->delim_cnt == 0)
944 dfm_expand_tabs (dls->reader);
945 dfm_get_record (dls->reader, &line);
947 cp = ls_c_str (&line);
948 if (dls->delim_cnt == 0)
950 /* Skip leading whitespace. */
951 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
953 if (cp >= ls_end (&line))
956 /* Handle actual data, whether quoted or unquoted. */
957 if (*cp == '\'' || *cp == '"')
961 field->string = ++cp;
962 while (cp < ls_end (&line) && *cp != quote)
964 field->length = cp - field->string;
965 if (cp < ls_end (&line))
968 msg (SW, _("Quoted string missing terminating `%c'."), quote);
973 while (cp < ls_end (&line)
974 && !isspace ((unsigned char) *cp) && *cp != ',')
976 field->length = cp - field->string;
979 /* Skip trailing whitespace and a single comma if present. */
980 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
982 if (cp < ls_end (&line) && *cp == ',')
987 if (cp >= ls_end (&line))
989 int column = dfm_column_start (dls->reader);
990 /* A blank line or a line that ends in \t has a
991 trailing blank field. */
992 if (column == 1 || (column > 1 && cp[-1] == '\t'))
997 field->string = ls_end (&line);
999 dfm_forward_record (dls->reader);
1014 while (cp < ls_end (&line)
1015 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1017 field->length = cp - field->string;
1018 if (cp < ls_end (&line))
1023 dfm_forward_columns (dls->reader, field->string - line.string);
1024 column_start = dfm_column_start (dls->reader);
1026 dfm_forward_columns (dls->reader, cp - field->string);
1028 return column_start;
1031 static bool read_from_data_list_fixed (const struct data_list_pgm *,
1033 static bool read_from_data_list_free (const struct data_list_pgm *,
1035 static bool read_from_data_list_list (const struct data_list_pgm *,
1038 /* Reads a case from DLS into C.
1039 Returns true if successful, false at end of file or on I/O error. */
1041 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
1045 dfm_push (dls->reader);
1049 retval = read_from_data_list_fixed (dls, c);
1052 retval = read_from_data_list_free (dls, c);
1055 retval = read_from_data_list_list (dls, c);
1060 dfm_pop (dls->reader);
1065 /* Reads a case from the data file into C, parsing it according
1066 to fixed-format syntax rules in DLS.
1067 Returns true if successful, false at end of file or on I/O error. */
1069 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
1071 struct dls_var_spec *var_spec = dls->first;
1074 if (dfm_eof (dls->reader))
1076 for (i = 1; i <= dls->rec_cnt; i++)
1078 struct fixed_string line;
1080 if (dfm_eof (dls->reader))
1082 /* Note that this can't occur on the first record. */
1083 msg (SW, _("Partial case of %d of %d records discarded."),
1084 i - 1, dls->rec_cnt);
1087 dfm_expand_tabs (dls->reader);
1088 dfm_get_record (dls->reader, &line);
1090 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1094 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1095 var_spec->fc, var_spec->lc);
1096 di.v = case_data_rw (c, var_spec->fv);
1097 di.flags = DI_IMPLIED_DECIMALS;
1098 di.f1 = var_spec->fc;
1099 di.format = var_spec->input;
1104 dfm_forward_record (dls->reader);
1110 /* Reads a case from the data file into C, parsing it according
1111 to free-format syntax rules in DLS.
1112 Returns true if successful, false at end of file or on I/O error. */
1114 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
1116 struct dls_var_spec *var_spec;
1119 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1121 struct fixed_string field;
1124 /* Cut out a field and read in a new record if necessary. */
1127 column = cut_field (dls, &field, &end_blank);
1131 if (!dfm_eof (dls->reader))
1132 dfm_forward_record (dls->reader);
1133 if (dfm_eof (dls->reader))
1135 if (var_spec != dls->first)
1136 msg (SW, _("Partial case discarded. The first variable "
1137 "missing was %s."), var_spec->name);
1145 di.s = ls_c_str (&field);
1146 di.e = ls_end (&field);
1147 di.v = case_data_rw (c, var_spec->fv);
1150 di.format = var_spec->input;
1157 /* Reads a case from the data file and parses it according to
1158 list-format syntax rules.
1159 Returns true if successful, false at end of file or on I/O error. */
1161 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
1163 struct dls_var_spec *var_spec;
1166 if (dfm_eof (dls->reader))
1169 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1171 struct fixed_string field;
1174 /* Cut out a field and check for end-of-line. */
1175 column = cut_field (dls, &field, &end_blank);
1178 if (get_undefined ())
1179 msg (SW, _("Missing value(s) for all variables from %s onward. "
1180 "These will be filled with the system-missing value "
1181 "or blanks, as appropriate."),
1183 for (; var_spec; var_spec = var_spec->next)
1185 int width = get_format_var_width (&var_spec->input);
1187 case_data_rw (c, var_spec->fv)->f = SYSMIS;
1189 memset (case_data_rw (c, var_spec->fv)->s, ' ', width);
1197 di.s = ls_c_str (&field);
1198 di.e = ls_end (&field);
1199 di.v = case_data_rw (c, var_spec->fv);
1202 di.format = var_spec->input;
1207 dfm_forward_record (dls->reader);
1211 /* Destroys SPEC. */
1213 destroy_dls_var_spec (struct dls_var_spec *spec)
1215 struct dls_var_spec *next;
1217 while (spec != NULL)
1225 /* Destroys DATA LIST transformation DLS.
1226 Returns true if successful, false if an I/O error occurred. */
1228 data_list_trns_free (void *dls_)
1230 struct data_list_pgm *dls = dls_;
1232 destroy_dls_var_spec (dls->first);
1233 dfm_close_reader (dls->reader);
1238 /* Handle DATA LIST transformation DLS, parsing data into C. */
1240 data_list_trns_proc (void *dls_, struct ccase *c, int case_num UNUSED)
1242 struct data_list_pgm *dls = dls_;
1245 if (read_from_data_list (dls, c))
1246 retval = TRNS_CONTINUE;
1247 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
1249 /* An I/O error, or encountering end of file for a second
1250 time, should be escalated into a more serious error. */
1251 retval = TRNS_ERROR;
1254 retval = TRNS_DROP_CASE;
1256 /* If there was an END subcommand handle it. */
1257 if (dls->end != NULL)
1259 double *end = &case_data_rw (c, dls->end->fv)->f;
1260 if (retval == TRNS_DROP_CASE)
1263 retval = TRNS_CONTINUE;
1272 /* Reads all the records from the data file and passes them to
1274 Returns true if successful, false if an I/O error occurred. */
1276 data_list_source_read (struct case_source *source,
1278 write_case_func *write_case, write_case_data wc_data)
1280 struct data_list_pgm *dls = source->aux;
1286 if (!read_from_data_list (dls, c))
1287 return !dfm_reader_error (dls->reader);
1289 dfm_push (dls->reader);
1290 ok = write_case (wc_data);
1291 dfm_pop (dls->reader);
1297 /* Destroys the source's internal data. */
1299 data_list_source_destroy (struct case_source *source)
1301 data_list_trns_free (source->aux);
1304 static const struct case_source_class data_list_source_class =
1308 data_list_source_read,
1309 data_list_source_destroy,
1312 /* REPEATING DATA. */
1314 /* Represents a number or a variable. */
1315 struct rpd_num_or_var
1317 int num; /* Value, or 0. */
1318 struct variable *var; /* Variable, if number==0. */
1321 /* REPEATING DATA private data structure. */
1322 struct repeating_data_trns
1324 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1325 struct dfm_reader *reader; /* Input file, never NULL. */
1327 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1328 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1329 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1330 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1331 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1332 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1334 /* ID subcommand. */
1335 int id_beg, id_end; /* Beginning & end columns. */
1336 struct variable *id_var; /* DATA LIST variable. */
1337 struct fmt_spec id_spec; /* Input format spec. */
1338 union value *id_value; /* ID value. */
1340 write_case_func *write_case;
1341 write_case_data wc_data;
1344 static trns_free_func repeating_data_trns_free;
1345 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1346 static int parse_repeating_data (struct dls_var_spec **,
1347 struct dls_var_spec **);
1348 static void find_variable_input_spec (struct variable *v,
1349 struct fmt_spec *spec);
1351 int cmd_repeating_data (void);
1353 /* Parses the REPEATING DATA command. */
1355 cmd_repeating_data (void)
1357 struct repeating_data_trns *rpd;
1358 int table = 1; /* Print table? */
1359 bool saw_starts = false; /* Saw STARTS subcommand? */
1360 bool saw_occurs = false; /* Saw OCCURS subcommand? */
1361 bool saw_length = false; /* Saw LENGTH subcommand? */
1362 bool saw_continued = false; /* Saw CONTINUED subcommand? */
1363 bool saw_id = false; /* Saw ID subcommand? */
1364 struct file_handle *const fh = fh_get_default_handle ();
1366 assert (in_input_program () || in_file_type ());
1368 rpd = xmalloc (sizeof *rpd);
1369 rpd->reader = dfm_open_reader (fh);
1370 rpd->first = rpd->last = NULL;
1371 rpd->starts_beg.num = 0;
1372 rpd->starts_beg.var = NULL;
1373 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1374 = rpd->cont_end = rpd->starts_beg;
1375 rpd->id_beg = rpd->id_end = 0;
1377 rpd->id_value = NULL;
1383 if (lex_match_id ("FILE"))
1385 struct file_handle *file;
1387 file = fh_parse (FH_REF_FILE | FH_REF_INLINE);
1392 msg (SE, _("REPEATING DATA must use the same file as its "
1393 "corresponding DATA LIST or FILE TYPE."));
1397 else if (lex_match_id ("STARTS"))
1402 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1407 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1410 lex_negative_to_dash ();
1411 if (lex_match ('-'))
1413 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1416 /* Otherwise, rpd->starts_end is uninitialized. We
1417 will initialize it later from the record length
1418 of the file. We can't do so now because the
1419 file handle may not be specified yet. */
1422 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1423 && rpd->starts_beg.num > rpd->starts_end.num)
1425 msg (SE, _("STARTS beginning column (%d) exceeds "
1426 "STARTS ending column (%d)."),
1427 rpd->starts_beg.num, rpd->starts_end.num);
1431 else if (lex_match_id ("OCCURS"))
1436 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1441 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1444 else if (lex_match_id ("LENGTH"))
1449 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1454 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1457 else if (lex_match_id ("CONTINUED"))
1462 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1465 saw_continued = true;
1467 if (!lex_match ('/'))
1469 if (!parse_num_or_var (&rpd->cont_beg,
1470 "CONTINUED beginning column"))
1473 lex_negative_to_dash ();
1475 && !parse_num_or_var (&rpd->cont_end,
1476 "CONTINUED ending column"))
1479 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1480 && rpd->cont_beg.num > rpd->cont_end.num)
1482 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1483 "CONTINUED ending column (%d)."),
1484 rpd->cont_beg.num, rpd->cont_end.num);
1489 rpd->cont_beg.num = 1;
1491 else if (lex_match_id ("ID"))
1496 msg (SE, _("%s subcommand given multiple times."),"ID");
1501 if (!lex_force_int ())
1503 if (lex_integer () < 1)
1505 msg (SE, _("ID beginning column (%ld) must be positive."),
1509 rpd->id_beg = lex_integer ();
1512 lex_negative_to_dash ();
1514 if (lex_match ('-'))
1516 if (!lex_force_int ())
1518 if (lex_integer () < 1)
1520 msg (SE, _("ID ending column (%ld) must be positive."),
1524 if (lex_integer () < rpd->id_end)
1526 msg (SE, _("ID ending column (%ld) cannot be less than "
1527 "ID beginning column (%d)."),
1528 lex_integer (), rpd->id_beg);
1532 rpd->id_end = lex_integer ();
1535 else rpd->id_end = rpd->id_beg;
1537 if (!lex_force_match ('='))
1539 rpd->id_var = parse_variable ();
1540 if (rpd->id_var == NULL)
1543 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1544 rpd->id_value = xnmalloc (rpd->id_var->nv, sizeof *rpd->id_value);
1546 else if (lex_match_id ("TABLE"))
1548 else if (lex_match_id ("NOTABLE"))
1550 else if (lex_match_id ("DATA"))
1558 if (!lex_force_match ('/'))
1562 /* Comes here when DATA specification encountered. */
1563 if (!saw_starts || !saw_occurs)
1566 msg (SE, _("Missing required specification STARTS."));
1568 msg (SE, _("Missing required specification OCCURS."));
1572 /* Enforce ID restriction. */
1573 if (saw_id && !saw_continued)
1575 msg (SE, _("ID specified without CONTINUED."));
1579 /* Calculate and check starts_end, cont_end if necessary. */
1580 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1582 rpd->starts_end.num = fh_get_record_width (fh);
1583 if (rpd->starts_beg.num != 0
1584 && rpd->starts_beg.num > rpd->starts_end.num)
1586 msg (SE, _("STARTS beginning column (%d) exceeds "
1587 "default STARTS ending column taken from file's "
1588 "record width (%d)."),
1589 rpd->starts_beg.num, rpd->starts_end.num);
1593 if (rpd->cont_end.num == 0 && rpd->cont_end.var == NULL)
1595 rpd->cont_end.num = fh_get_record_width (fh);
1596 if (rpd->cont_beg.num != 0
1597 && rpd->cont_beg.num > rpd->cont_end.num)
1599 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1600 "default CONTINUED ending column taken from file's "
1601 "record width (%d)."),
1602 rpd->cont_beg.num, rpd->cont_end.num);
1608 if (!parse_repeating_data (&rpd->first, &rpd->last))
1611 /* Calculate length if necessary. */
1614 struct dls_var_spec *iter;
1616 for (iter = rpd->first; iter; iter = iter->next)
1617 if (iter->lc > rpd->length.num)
1618 rpd->length.num = iter->lc;
1619 assert (rpd->length.num != 0);
1623 dump_fixed_table (rpd->first, fh, rpd->last->rec);
1625 add_transformation (repeating_data_trns_proc, repeating_data_trns_free, rpd);
1627 return lex_end_of_command ();
1630 repeating_data_trns_free (rpd);
1631 return CMD_CASCADING_FAILURE;
1634 /* Finds the input format specification for variable V and puts
1635 it in SPEC. Because of the way that DATA LIST is structured,
1636 this is nontrivial. */
1638 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1642 for (i = 0; i < n_trns; i++)
1644 struct transformation *trns = &t_trns[i];
1646 if (trns->proc == data_list_trns_proc)
1648 struct data_list_pgm *pgm = trns->private;
1649 struct dls_var_spec *iter;
1651 for (iter = pgm->first; iter; iter = iter->next)
1654 *spec = iter->input;
1663 /* Parses a number or a variable name from the syntax file and puts
1664 the results in VALUE. Ensures that the number is at least 1; else
1665 emits an error based on MESSAGE. Returns nonzero only if
1668 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1673 value->var = parse_variable ();
1674 if (value->var == NULL)
1676 if (value->var->type == ALPHA)
1678 msg (SE, _("String variable not allowed here."));
1682 else if (lex_is_integer ())
1684 value->num = lex_integer ();
1688 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1694 msg (SE, _("Variable or integer expected for %s."), message);
1700 /* Parses data specifications for repeating data groups, adding
1701 them to the linked list with head FIRST and tail LAST.
1702 Returns nonzero only if successful. */
1704 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1706 struct fixed_parsing_state fx;
1712 while (token != '.')
1714 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1717 if (lex_is_number ())
1719 if (!fixed_parse_compatible (&fx, first, last))
1722 else if (token == '(')
1724 if (!fixed_parse_fortran (&fx, first, last))
1729 msg (SE, _("SPSS-like or FORTRAN-like format "
1730 "specification expected after variable names."));
1734 for (i = 0; i < fx.name_cnt; i++)
1742 for (i = 0; i < fx.name_cnt; i++)
1748 /* Obtains the real value for rpd_num_or_var N in case C and returns
1749 it. The valid range is nonnegative numbers, but numbers outside
1750 this range can be returned and should be handled by the caller as
1753 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1757 double v = case_num (c, n->var->fv);
1758 return v != SYSMIS && v >= INT_MIN && v <= INT_MAX ? v : -1;
1764 /* Parameter record passed to rpd_parse_record(). */
1765 struct rpd_parse_info
1767 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1768 const char *line; /* Line being parsed. */
1769 size_t len; /* Line length. */
1770 int beg, end; /* First and last column of first occurrence. */
1771 int ofs; /* Column offset between repeated occurrences. */
1772 struct ccase *c; /* Case to fill in. */
1773 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1774 int max_occurs; /* Max number of occurrences to parse. */
1777 /* Parses one record of repeated data and outputs corresponding
1778 cases. Returns number of occurrences parsed up to the
1779 maximum specified in INFO. */
1781 rpd_parse_record (const struct rpd_parse_info *info)
1783 struct repeating_data_trns *t = info->trns;
1784 int cur = info->beg;
1787 /* Handle record ID values. */
1790 union value id_temp[MAX_ELEMS_PER_VALUE];
1792 /* Parse record ID into V. */
1796 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1797 di.v = info->verify_id ? id_temp : t->id_value;
1800 di.format = t->id_spec;
1807 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1809 char expected_str [MAX_FORMATTED_LEN + 1];
1810 char actual_str [MAX_FORMATTED_LEN + 1];
1812 data_out (expected_str, &t->id_var->print, t->id_value);
1813 expected_str[t->id_var->print.w] = '\0';
1815 data_out (actual_str, &t->id_var->print, id_temp);
1816 actual_str[t->id_var->print.w] = '\0';
1819 _("Encountered mismatched record ID \"%s\" "
1820 "expecting \"%s\"."),
1821 actual_str, expected_str);
1827 /* Iterate over the set of expected occurrences and record each of
1828 them as a separate case. FIXME: We need to execute any
1829 transformations that follow the current one. */
1833 for (occurrences = 0; occurrences < info->max_occurs; )
1835 if (cur + info->ofs > info->end + 1)
1840 struct dls_var_spec *var_spec = t->first;
1842 for (; var_spec; var_spec = var_spec->next)
1844 int fc = var_spec->fc - 1 + cur;
1845 int lc = var_spec->lc - 1 + cur;
1847 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1852 _("Variable %s starting in column %d extends "
1853 "beyond physical record length of %d."),
1854 var_spec->v->name, fc, info->len);
1860 data_in_finite_line (&di, info->line, info->len, fc, lc);
1861 di.v = case_data_rw (info->c, var_spec->fv);
1864 di.format = var_spec->input;
1874 if (!t->write_case (t->wc_data))
1882 /* Reads one set of repetitions of the elements in the REPEATING
1883 DATA structure. Returns TRNS_CONTINUE on success,
1884 TRNS_DROP_CASE on end of file or on failure, or TRNS_NEXT_CASE. */
1886 repeating_data_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
1888 struct repeating_data_trns *t = trns_;
1890 struct fixed_string line; /* Current record. */
1892 int starts_beg; /* Starting column. */
1893 int starts_end; /* Ending column. */
1894 int occurs; /* Number of repetitions. */
1895 int length; /* Length of each occurrence. */
1896 int cont_beg; /* Starting column for continuation lines. */
1897 int cont_end; /* Ending column for continuation lines. */
1899 int occurs_left; /* Number of occurrences remaining. */
1901 int code; /* Return value from rpd_parse_record(). */
1903 int skip_first_record = 0;
1905 dfm_push (t->reader);
1907 /* Read the current record. */
1908 dfm_reread_record (t->reader, 1);
1909 dfm_expand_tabs (t->reader);
1910 if (dfm_eof (t->reader))
1911 return TRNS_DROP_CASE;
1912 dfm_get_record (t->reader, &line);
1913 dfm_forward_record (t->reader);
1915 /* Calculate occurs, length. */
1916 occurs_left = occurs = realize_value (&t->occurs, c);
1919 rpd_msg (SE, _("Invalid value %d for OCCURS."), occurs);
1920 return TRNS_NEXT_CASE;
1922 starts_beg = realize_value (&t->starts_beg, c);
1923 if (starts_beg <= 0)
1925 rpd_msg (SE, _("Beginning column for STARTS (%d) must be at least 1."),
1927 return TRNS_NEXT_CASE;
1929 starts_end = realize_value (&t->starts_end, c);
1930 if (starts_end < starts_beg)
1932 rpd_msg (SE, _("Ending column for STARTS (%d) is less than "
1933 "beginning column (%d)."),
1934 starts_end, starts_beg);
1935 skip_first_record = 1;
1937 length = realize_value (&t->length, c);
1940 rpd_msg (SE, _("Invalid value %d for LENGTH."), length);
1942 occurs = occurs_left = 1;
1944 cont_beg = realize_value (&t->cont_beg, c);
1947 rpd_msg (SE, _("Beginning column for CONTINUED (%d) must be "
1950 return TRNS_DROP_CASE;
1952 cont_end = realize_value (&t->cont_end, c);
1953 if (cont_end < cont_beg)
1955 rpd_msg (SE, _("Ending column for CONTINUED (%d) is less than "
1956 "beginning column (%d)."),
1957 cont_end, cont_beg);
1958 return TRNS_DROP_CASE;
1961 /* Parse the first record. */
1962 if (!skip_first_record)
1964 struct rpd_parse_info info;
1966 info.line = ls_c_str (&line);
1967 info.len = ls_length (&line);
1968 info.beg = starts_beg;
1969 info.end = starts_end;
1973 info.max_occurs = occurs_left;
1974 code = rpd_parse_record (&info);
1976 return TRNS_DROP_CASE;
1977 occurs_left -= code;
1979 else if (cont_beg == 0)
1980 return TRNS_NEXT_CASE;
1982 /* Make sure, if some occurrences are left, that we have
1983 continuation records. */
1984 if (occurs_left > 0 && cont_beg == 0)
1987 _("Number of repetitions specified on OCCURS (%d) "
1988 "exceed number of repetitions available in "
1989 "space on STARTS (%d), and CONTINUED not specified."),
1990 occurs, (starts_end - starts_beg + 1) / length);
1991 return TRNS_DROP_CASE;
1994 /* Go on to additional records. */
1995 while (occurs_left != 0)
1997 struct rpd_parse_info info;
1999 assert (occurs_left >= 0);
2001 /* Read in another record. */
2002 if (dfm_eof (t->reader))
2005 _("Unexpected end of file with %d repetitions "
2006 "remaining out of %d."),
2007 occurs_left, occurs);
2008 return TRNS_DROP_CASE;
2010 dfm_expand_tabs (t->reader);
2011 dfm_get_record (t->reader, &line);
2012 dfm_forward_record (t->reader);
2014 /* Parse this record. */
2016 info.line = ls_c_str (&line);
2017 info.len = ls_length (&line);
2018 info.beg = cont_beg;
2019 info.end = cont_end;
2023 info.max_occurs = occurs_left;
2024 code = rpd_parse_record (&info);;
2026 return TRNS_DROP_CASE;
2027 occurs_left -= code;
2030 dfm_pop (t->reader);
2032 /* FIXME: This is a kluge until we've implemented multiplexing of
2034 return TRNS_NEXT_CASE;
2037 /* Frees a REPEATING DATA transformation.
2038 Returns true if successful, false if an I/O error occurred. */
2040 repeating_data_trns_free (void *rpd_)
2042 struct repeating_data_trns *rpd = rpd_;
2044 destroy_dls_var_spec (rpd->first);
2045 dfm_close_reader (rpd->reader);
2046 free (rpd->id_value);
2051 /* Lets repeating_data_trns_proc() know how to write the cases
2052 that it composes. Not elegant. */
2054 repeating_data_set_write_case (struct transformation *trns_,
2055 write_case_func *write_case,
2056 write_case_data wc_data)
2058 struct repeating_data_trns *t = trns_->private;
2060 assert (trns_->proc == repeating_data_trns_proc);
2061 t->write_case = write_case;
2062 t->wc_data = wc_data;
2065 /* Reports a message in CLASS with the given FORMAT as text,
2066 prefixing the message with "REPEATING DATA: " to make the
2069 rpd_msg (enum msg_class class, const char *format, ...)
2075 ds_create (&text, "REPEATING DATA: ");
2076 va_start (args, format);
2077 ds_vprintf (&text, format, args);
2080 m.category = msg_class_to_category (class);
2081 m.severity = msg_class_to_severity (class);
2082 m.where.file_name = NULL;
2083 m.where.line_number = 0;
2084 m.text = ds_c_str (&text);