1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <language/data-io/data-list.h>
22 #include <libpspp/message.h>
27 #include <libpspp/alloc.h>
28 #include <data/case.h>
29 #include <language/command.h>
30 #include <libpspp/compiler.h>
31 #include <data/data-in.h>
32 #include <language/data-io/data-reader.h>
33 #include <data/dictionary.h>
34 #include <libpspp/message.h>
35 #include <language/data-io/file-handle.h>
36 #include <data/format.h>
37 #include <language/lexer/lexer.h>
38 #include <libpspp/misc.h>
39 #include <data/settings.h>
40 #include <libpspp/str.h>
41 #include <output/table.h>
42 #include <data/variable.h>
43 #include <procedure.h>
45 #include "data-list.h"
48 #define _(msgid) gettext (msgid)
50 /* Utility function. */
52 /* FIXME: Either REPEATING DATA must be the last transformation, or we
53 must multiplex the transformations that follow (i.e., perform them
54 for every case that we produce from a repetition instance).
55 Currently we do neither. We should do one or the other. */
57 /* Describes how to parse one variable. */
60 struct dls_var_spec *next; /* Next specification in list. */
62 /* Both free and fixed formats. */
63 struct fmt_spec input; /* Input format of this field. */
64 struct variable *v; /* Associated variable. Used only in
65 parsing. Not safe later. */
66 int fv; /* First value in case. */
68 /* Fixed format only. */
69 int rec; /* Record number (1-based). */
70 int fc, lc; /* Column numbers in record. */
72 /* Free format only. */
73 char name[LONG_NAME_LEN + 1]; /* Name of variable. */
76 /* Constants for DATA LIST type. */
77 /* Must match table in cmd_data_list(). */
85 /* DATA LIST private data structure. */
88 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
89 struct dfm_reader *reader; /* Data file reader. */
91 int type; /* A DLS_* constant. */
92 struct variable *end; /* Variable specified on END subcommand. */
93 int rec_cnt; /* Number of records. */
94 size_t case_size; /* Case size in bytes. */
95 char *delims; /* Delimiters if any; not null-terminated. */
96 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
99 static const struct case_source_class data_list_source_class;
101 static int parse_fixed (struct data_list_pgm *);
102 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
103 static void dump_fixed_table (const struct dls_var_spec *,
104 const struct file_handle *, int rec_cnt);
105 static void dump_free_table (const struct data_list_pgm *,
106 const struct file_handle *);
107 static void destroy_dls_var_spec (struct dls_var_spec *);
108 static trns_free_func data_list_trns_free;
109 static trns_proc_func data_list_trns_proc;
111 /* Message title for REPEATING DATA. */
112 #define RPD_ERR "REPEATING DATA: "
117 struct data_list_pgm *dls;
118 int table = -1; /* Print table if nonzero, -1=undecided. */
119 struct file_handle *fh = fh_inline_file ();
121 if (!case_source_is_complex (vfm_source))
122 discard_variables ();
124 dls = xmalloc (sizeof *dls);
131 dls->first = dls->last = NULL;
135 if (lex_match_id ("FILE"))
138 fh = fh_parse (FH_REF_FILE | FH_REF_INLINE);
141 if (case_source_is_class (vfm_source, &file_type_source_class)
142 && fh != fh_get_default_handle ())
144 msg (SE, _("DATA LIST must use the same file "
145 "as the enclosing FILE TYPE."));
149 else if (lex_match_id ("RECORDS"))
153 if (!lex_force_int ())
155 dls->rec_cnt = lex_integer ();
159 else if (lex_match_id ("END"))
163 msg (SE, _("The END subcommand may only be specified once."));
168 if (!lex_force_id ())
170 dls->end = dict_lookup_var (default_dict, tokid);
172 dls->end = dict_create_var_assert (default_dict, tokid, 0);
175 else if (token == T_ID)
177 if (lex_match_id ("NOTABLE"))
179 else if (lex_match_id ("TABLE"))
184 if (lex_match_id ("FIXED"))
186 else if (lex_match_id ("FREE"))
188 else if (lex_match_id ("LIST"))
198 msg (SE, _("Only one of FIXED, FREE, or LIST may "
204 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
207 while (!lex_match (')'))
211 if (lex_match_id ("TAB"))
213 else if (token == T_STRING && tokstr.length == 1)
215 delim = tokstr.string[0];
224 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
225 dls->delims[dls->delim_cnt++] = delim;
239 dls->case_size = dict_get_case_size (default_dict);
240 fh_set_default_handle (fh);
243 dls->type = DLS_FIXED;
247 if (dls->type == DLS_FREE)
253 if (dls->type == DLS_FIXED)
255 if (!parse_fixed (dls))
258 dump_fixed_table (dls->first, fh, dls->rec_cnt);
262 if (!parse_free (&dls->first, &dls->last))
265 dump_free_table (dls, fh);
268 dls->reader = dfm_open_reader (fh);
269 if (dls->reader == NULL)
272 if (vfm_source != NULL)
273 add_transformation (data_list_trns_proc, data_list_trns_free, dls);
275 vfm_source = create_case_source (&data_list_source_class, dls);
280 data_list_trns_free (dls);
281 return CMD_CASCADING_FAILURE;
284 /* Adds SPEC to the linked list with head at FIRST and tail at
287 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
288 struct dls_var_spec *spec)
295 (*last)->next = spec;
299 /* Fixed-format parsing. */
301 /* Used for chaining together fortran-like format specifiers. */
304 struct fmt_list *next;
307 struct fmt_list *down;
310 /* State of parsing DATA LIST. */
311 struct fixed_parsing_state
313 char **name; /* Variable names. */
314 size_t name_cnt; /* Number of names. */
316 int recno; /* Index of current record. */
317 int sc; /* 1-based column number of starting column for
318 next field to output. */
321 static int fixed_parse_compatible (struct fixed_parsing_state *,
322 struct dls_var_spec **,
323 struct dls_var_spec **);
324 static int fixed_parse_fortran (struct fixed_parsing_state *,
325 struct dls_var_spec **,
326 struct dls_var_spec **);
328 /* Parses all the variable specifications for DATA LIST FIXED,
329 storing them into DLS. Returns nonzero if successful. */
331 parse_fixed (struct data_list_pgm *dls)
333 struct fixed_parsing_state fx;
341 while (lex_match ('/'))
344 if (lex_is_integer ())
346 if (lex_integer () < fx.recno)
348 msg (SE, _("The record number specified, %ld, is "
349 "before the previous record, %d. Data "
350 "fields must be listed in order of "
351 "increasing record number."),
352 lex_integer (), fx.recno - 1);
356 fx.recno = lex_integer ();
362 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
365 if (lex_is_number ())
367 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
370 else if (token == '(')
372 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
377 msg (SE, _("SPSS-like or FORTRAN-like format "
378 "specification expected after variable names."));
382 for (i = 0; i < fx.name_cnt; i++)
386 if (dls->first == NULL)
388 msg (SE, _("At least one variable must be specified."));
391 if (dls->rec_cnt && dls->last->rec > dls->rec_cnt)
393 msg (SE, _("Variables are specified on records that "
394 "should not exist according to RECORDS subcommand."));
397 else if (!dls->rec_cnt)
398 dls->rec_cnt = dls->last->rec;
399 return lex_end_of_command () == CMD_SUCCESS;
402 for (i = 0; i < fx.name_cnt; i++)
408 /* Parses a variable specification in the form 1-10 (A) based on
409 FX and adds specifications to the linked list with head at
410 FIRST and tail at LAST. */
412 fixed_parse_compatible (struct fixed_parsing_state *fx,
413 struct dls_var_spec **first, struct dls_var_spec **last)
415 struct fmt_spec input;
421 if (!lex_force_int ())
426 msg (SE, _("Column positions for fields must be positive."));
432 lex_negative_to_dash ();
435 if (!lex_force_int ())
440 msg (SE, _("Column positions for fields must be positive."));
445 msg (SE, _("The ending column for a field must be "
446 "greater than the starting column."));
455 /* Divide columns evenly. */
456 input.w = (lc - fc + 1) / fx->name_cnt;
457 if ((lc - fc + 1) % fx->name_cnt)
459 msg (SE, _("The %d columns %d-%d "
460 "can't be evenly divided into %d fields."),
461 lc - fc + 1, fc, lc, fx->name_cnt);
465 /* Format specifier. */
468 struct fmt_desc *fdp;
474 input.type = parse_format_specifier_name (&cp, 0);
475 if (input.type == -1)
479 msg (SE, _("A format specifier on this line "
480 "has extra characters on the end."));
490 if (lex_is_integer ())
492 if (lex_integer () < 1)
494 msg (SE, _("The value for number of decimal places "
495 "must be at least 1."));
499 input.d = lex_integer ();
505 fdp = &formats[input.type];
506 if (fdp->n_args < 2 && input.d)
508 msg (SE, _("Input format %s doesn't accept decimal places."),
516 if (!lex_force_match (')'))
524 if (!check_input_specifier (&input, 1))
527 /* Start column for next specification. */
530 /* Width of variables to create. */
531 if (input.type == FMT_A || input.type == FMT_AHEX)
536 /* Create variables and var specs. */
537 for (i = 0; i < fx->name_cnt; i++)
539 struct dls_var_spec *spec;
542 v = dict_create_var (default_dict, fx->name[i], width);
545 convert_fmt_ItoO (&input, &v->print);
547 if (!case_source_is_complex (vfm_source))
552 v = dict_lookup_var_assert (default_dict, fx->name[i]);
553 if (vfm_source == NULL)
555 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
558 if ((width != 0) != (v->width != 0))
560 msg (SE, _("There is already a variable %s of a "
565 if (width != 0 && width != v->width)
567 msg (SE, _("There is already a string variable %s of a "
568 "different width."), fx->name[i]);
573 spec = xmalloc (sizeof *spec);
577 spec->rec = fx->recno;
578 spec->fc = fc + input.w * i;
579 spec->lc = spec->fc + input.w - 1;
580 append_var_spec (first, last, spec);
585 /* Destroy format list F and, if RECURSE is nonzero, all its
588 destroy_fmt_list (struct fmt_list *f, int recurse)
590 struct fmt_list *next;
595 if (recurse && f->f.type == FMT_DESCEND)
596 destroy_fmt_list (f->down, 1);
601 /* Takes a hierarchically structured fmt_list F as constructed by
602 fixed_parse_fortran(), and flattens it, adding the variable
603 specifications to the linked list with head FIRST and tail
604 LAST. NAME_IDX is used to take values from the list of names
605 in FX; it should initially point to a value of 0. */
607 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
608 struct dls_var_spec **first, struct dls_var_spec **last,
613 for (; f; f = f->next)
614 if (f->f.type == FMT_X)
616 else if (f->f.type == FMT_T)
618 else if (f->f.type == FMT_NEWREC)
620 fx->recno += f->count;
624 for (i = 0; i < f->count; i++)
625 if (f->f.type == FMT_DESCEND)
627 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
632 struct dls_var_spec *spec;
636 if (formats[f->f.type].cat & FCAT_STRING)
640 if (*name_idx >= fx->name_cnt)
642 msg (SE, _("The number of format "
643 "specifications exceeds the given number of "
648 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
651 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
655 if (!case_source_is_complex (vfm_source))
658 spec = xmalloc (sizeof *spec);
662 spec->rec = fx->recno;
664 spec->lc = fx->sc + f->f.w - 1;
665 append_var_spec (first, last, spec);
667 convert_fmt_ItoO (&spec->input, &v->print);
675 /* Recursively parses a FORTRAN-like format specification into
676 the linked list with head FIRST and tail TAIL. LEVEL is the
677 level of recursion, starting from 0. Returns the parsed
678 specification if successful, or a null pointer on failure. */
679 static struct fmt_list *
680 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
681 struct dls_var_spec **first,
682 struct dls_var_spec **last)
684 struct fmt_list *head = NULL;
685 struct fmt_list *tail = NULL;
687 lex_force_match ('(');
691 struct fmt_list *new = xmalloc (sizeof *new);
694 /* Append new to list. */
702 if (lex_is_integer ())
704 new->count = lex_integer ();
710 /* Parse format specifier. */
713 new->f.type = FMT_DESCEND;
714 new->down = fixed_parse_fortran_internal (fx, first, last);
715 if (new->down == NULL)
718 else if (lex_match ('/'))
719 new->f.type = FMT_NEWREC;
720 else if (!parse_format_specifier (&new->f, FMTP_ALLOW_XT)
721 || !check_input_specifier (&new->f, 1))
726 lex_force_match (')');
731 destroy_fmt_list (head, 0);
736 /* Parses a FORTRAN-like format specification into the linked
737 list with head FIRST and tail LAST. Returns nonzero if
740 fixed_parse_fortran (struct fixed_parsing_state *fx,
741 struct dls_var_spec **first, struct dls_var_spec **last)
743 struct fmt_list *list;
746 list = fixed_parse_fortran_internal (fx, first, last);
751 dump_fmt_list (fx, list, first, last, &name_idx);
752 destroy_fmt_list (list, 1);
753 if (name_idx < fx->name_cnt)
755 msg (SE, _("There aren't enough format specifications "
756 "to match the number of variable names given."));
763 /* Displays a table giving information on fixed-format variable
764 parsing on DATA LIST. */
765 /* FIXME: The `Columns' column should be divided into three columns,
766 one for the starting column, one for the dash, one for the ending
767 column; then right-justify the starting column and left-justify the
770 dump_fixed_table (const struct dls_var_spec *specs,
771 const struct file_handle *fh, int rec_cnt)
773 const struct dls_var_spec *spec;
777 for (i = 0, spec = specs; spec; spec = spec->next)
779 t = tab_create (4, i + 1, 0);
780 tab_columns (t, TAB_COL_DOWN, 1);
781 tab_headers (t, 0, 0, 1, 0);
782 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
783 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
784 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
785 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
786 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
787 tab_hline (t, TAL_2, 0, 3, 1);
788 tab_dim (t, tab_natural_dimensions);
790 for (i = 1, spec = specs; spec; spec = spec->next, i++)
792 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
793 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
794 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
796 tab_text (t, 3, i, TAB_LEFT | TAB_FIX,
797 fmt_to_string (&spec->input));
800 tab_title (t, ngettext ("Reading %d record from %s.",
801 "Reading %d records from %s.", rec_cnt),
802 rec_cnt, fh_get_name (fh));
806 /* Free-format parsing. */
808 /* Parses variable specifications for DATA LIST FREE and adds
809 them to the linked list with head FIRST and tail LAST.
810 Returns nonzero only if successful. */
812 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
817 struct fmt_spec input, output;
823 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
828 if (!parse_format_specifier (&input, 0)
829 || !check_input_specifier (&input, 1)
830 || !lex_force_match (')'))
832 for (i = 0; i < name_cnt; i++)
837 convert_fmt_ItoO (&input, &output);
842 input = make_input_format (FMT_F, 8, 0);
843 output = *get_format ();
846 if (input.type == FMT_A || input.type == FMT_AHEX)
850 for (i = 0; i < name_cnt; i++)
852 struct dls_var_spec *spec;
855 v = dict_create_var (default_dict, name[i], width);
859 msg (SE, _("%s is a duplicate variable name."), name[i]);
862 v->print = v->write = output;
864 if (!case_source_is_complex (vfm_source))
867 spec = xmalloc (sizeof *spec);
871 str_copy_trunc (spec->name, sizeof spec->name, v->name);
872 append_var_spec (first, last, spec);
874 for (i = 0; i < name_cnt; i++)
879 return lex_end_of_command () == CMD_SUCCESS;
882 /* Displays a table giving information on free-format variable parsing
885 dump_free_table (const struct data_list_pgm *dls,
886 const struct file_handle *fh)
892 struct dls_var_spec *spec;
893 for (i = 0, spec = dls->first; spec; spec = spec->next)
897 t = tab_create (2, i + 1, 0);
898 tab_columns (t, TAB_COL_DOWN, 1);
899 tab_headers (t, 0, 0, 1, 0);
900 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
901 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
902 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
903 tab_hline (t, TAL_2, 0, 1, 1);
904 tab_dim (t, tab_natural_dimensions);
907 struct dls_var_spec *spec;
909 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
911 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
912 tab_text (t, 1, i, TAB_LEFT | TAB_FIX, fmt_to_string (&spec->input));
916 tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh));
921 /* Input procedure. */
923 /* Extracts a field from the current position in the current
924 record. Fields can be unquoted or quoted with single- or
925 double-quote characters. *FIELD is set to the field content.
926 After parsing the field, sets the current position in the
927 record to just past the field and any trailing delimiter.
928 END_BLANK is used internally; it should be initialized by the
929 caller to 0 and left alone afterward. Returns 0 on failure or
930 a 1-based column number indicating the beginning of the field
933 cut_field (const struct data_list_pgm *dls, struct fixed_string *field,
936 struct fixed_string line;
940 if (dfm_eof (dls->reader))
942 if (dls->delim_cnt == 0)
943 dfm_expand_tabs (dls->reader);
944 dfm_get_record (dls->reader, &line);
946 cp = ls_c_str (&line);
947 if (dls->delim_cnt == 0)
949 /* Skip leading whitespace. */
950 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
952 if (cp >= ls_end (&line))
955 /* Handle actual data, whether quoted or unquoted. */
956 if (*cp == '\'' || *cp == '"')
960 field->string = ++cp;
961 while (cp < ls_end (&line) && *cp != quote)
963 field->length = cp - field->string;
964 if (cp < ls_end (&line))
967 msg (SW, _("Quoted string missing terminating `%c'."), quote);
972 while (cp < ls_end (&line)
973 && !isspace ((unsigned char) *cp) && *cp != ',')
975 field->length = cp - field->string;
978 /* Skip trailing whitespace and a single comma if present. */
979 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
981 if (cp < ls_end (&line) && *cp == ',')
986 if (cp >= ls_end (&line))
988 int column = dfm_column_start (dls->reader);
989 /* A blank line or a line that ends in \t has a
990 trailing blank field. */
991 if (column == 1 || (column > 1 && cp[-1] == '\t'))
996 field->string = ls_end (&line);
998 dfm_forward_record (dls->reader);
1013 while (cp < ls_end (&line)
1014 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1016 field->length = cp - field->string;
1017 if (cp < ls_end (&line))
1022 dfm_forward_columns (dls->reader, field->string - line.string);
1023 column_start = dfm_column_start (dls->reader);
1025 dfm_forward_columns (dls->reader, cp - field->string);
1027 return column_start;
1030 static bool read_from_data_list_fixed (const struct data_list_pgm *,
1032 static bool read_from_data_list_free (const struct data_list_pgm *,
1034 static bool read_from_data_list_list (const struct data_list_pgm *,
1037 /* Reads a case from DLS into C.
1038 Returns true if successful, false at end of file or on I/O error. */
1040 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
1044 dfm_push (dls->reader);
1048 retval = read_from_data_list_fixed (dls, c);
1051 retval = read_from_data_list_free (dls, c);
1054 retval = read_from_data_list_list (dls, c);
1059 dfm_pop (dls->reader);
1064 /* Reads a case from the data file into C, parsing it according
1065 to fixed-format syntax rules in DLS.
1066 Returns true if successful, false at end of file or on I/O error. */
1068 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
1070 struct dls_var_spec *var_spec = dls->first;
1073 if (dfm_eof (dls->reader))
1075 for (i = 1; i <= dls->rec_cnt; i++)
1077 struct fixed_string line;
1079 if (dfm_eof (dls->reader))
1081 /* Note that this can't occur on the first record. */
1082 msg (SW, _("Partial case of %d of %d records discarded."),
1083 i - 1, dls->rec_cnt);
1086 dfm_expand_tabs (dls->reader);
1087 dfm_get_record (dls->reader, &line);
1089 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1093 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1094 var_spec->fc, var_spec->lc);
1095 di.v = case_data_rw (c, var_spec->fv);
1096 di.flags = DI_IMPLIED_DECIMALS;
1097 di.f1 = var_spec->fc;
1098 di.format = var_spec->input;
1103 dfm_forward_record (dls->reader);
1109 /* Reads a case from the data file into C, parsing it according
1110 to free-format syntax rules in DLS.
1111 Returns true if successful, false at end of file or on I/O error. */
1113 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
1115 struct dls_var_spec *var_spec;
1118 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1120 struct fixed_string field;
1123 /* Cut out a field and read in a new record if necessary. */
1126 column = cut_field (dls, &field, &end_blank);
1130 if (!dfm_eof (dls->reader))
1131 dfm_forward_record (dls->reader);
1132 if (dfm_eof (dls->reader))
1134 if (var_spec != dls->first)
1135 msg (SW, _("Partial case discarded. The first variable "
1136 "missing was %s."), var_spec->name);
1144 di.s = ls_c_str (&field);
1145 di.e = ls_end (&field);
1146 di.v = case_data_rw (c, var_spec->fv);
1149 di.format = var_spec->input;
1156 /* Reads a case from the data file and parses it according to
1157 list-format syntax rules.
1158 Returns true if successful, false at end of file or on I/O error. */
1160 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
1162 struct dls_var_spec *var_spec;
1165 if (dfm_eof (dls->reader))
1168 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1170 struct fixed_string field;
1173 /* Cut out a field and check for end-of-line. */
1174 column = cut_field (dls, &field, &end_blank);
1177 if (get_undefined ())
1178 msg (SW, _("Missing value(s) for all variables from %s onward. "
1179 "These will be filled with the system-missing value "
1180 "or blanks, as appropriate."),
1182 for (; var_spec; var_spec = var_spec->next)
1184 int width = get_format_var_width (&var_spec->input);
1186 case_data_rw (c, var_spec->fv)->f = SYSMIS;
1188 memset (case_data_rw (c, var_spec->fv)->s, ' ', width);
1196 di.s = ls_c_str (&field);
1197 di.e = ls_end (&field);
1198 di.v = case_data_rw (c, var_spec->fv);
1201 di.format = var_spec->input;
1206 dfm_forward_record (dls->reader);
1210 /* Destroys SPEC. */
1212 destroy_dls_var_spec (struct dls_var_spec *spec)
1214 struct dls_var_spec *next;
1216 while (spec != NULL)
1224 /* Destroys DATA LIST transformation DLS.
1225 Returns true if successful, false if an I/O error occurred. */
1227 data_list_trns_free (void *dls_)
1229 struct data_list_pgm *dls = dls_;
1231 destroy_dls_var_spec (dls->first);
1232 dfm_close_reader (dls->reader);
1237 /* Handle DATA LIST transformation DLS, parsing data into C. */
1239 data_list_trns_proc (void *dls_, struct ccase *c, int case_num UNUSED)
1241 struct data_list_pgm *dls = dls_;
1244 if (read_from_data_list (dls, c))
1245 retval = TRNS_CONTINUE;
1246 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
1248 /* An I/O error, or encountering end of file for a second
1249 time, should be escalated into a more serious error. */
1250 retval = TRNS_ERROR;
1253 retval = TRNS_DROP_CASE;
1255 /* If there was an END subcommand handle it. */
1256 if (dls->end != NULL)
1258 double *end = &case_data_rw (c, dls->end->fv)->f;
1259 if (retval == TRNS_DROP_CASE)
1262 retval = TRNS_CONTINUE;
1271 /* Reads all the records from the data file and passes them to
1273 Returns true if successful, false if an I/O error occurred. */
1275 data_list_source_read (struct case_source *source,
1277 write_case_func *write_case, write_case_data wc_data)
1279 struct data_list_pgm *dls = source->aux;
1285 if (!read_from_data_list (dls, c))
1286 return !dfm_reader_error (dls->reader);
1288 dfm_push (dls->reader);
1289 ok = write_case (wc_data);
1290 dfm_pop (dls->reader);
1296 /* Destroys the source's internal data. */
1298 data_list_source_destroy (struct case_source *source)
1300 data_list_trns_free (source->aux);
1303 static const struct case_source_class data_list_source_class =
1307 data_list_source_read,
1308 data_list_source_destroy,
1311 /* REPEATING DATA. */
1313 /* Represents a number or a variable. */
1314 struct rpd_num_or_var
1316 int num; /* Value, or 0. */
1317 struct variable *var; /* Variable, if number==0. */
1320 /* REPEATING DATA private data structure. */
1321 struct repeating_data_trns
1323 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1324 struct dfm_reader *reader; /* Input file, never NULL. */
1326 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1327 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1328 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1329 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1330 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1331 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1333 /* ID subcommand. */
1334 int id_beg, id_end; /* Beginning & end columns. */
1335 struct variable *id_var; /* DATA LIST variable. */
1336 struct fmt_spec id_spec; /* Input format spec. */
1337 union value *id_value; /* ID value. */
1339 write_case_func *write_case;
1340 write_case_data wc_data;
1343 static trns_free_func repeating_data_trns_free;
1344 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1345 static int parse_repeating_data (struct dls_var_spec **,
1346 struct dls_var_spec **);
1347 static void find_variable_input_spec (struct variable *v,
1348 struct fmt_spec *spec);
1350 int cmd_repeating_data (void);
1352 /* Parses the REPEATING DATA command. */
1354 cmd_repeating_data (void)
1356 struct repeating_data_trns *rpd;
1357 int table = 1; /* Print table? */
1358 bool saw_starts = false; /* Saw STARTS subcommand? */
1359 bool saw_occurs = false; /* Saw OCCURS subcommand? */
1360 bool saw_length = false; /* Saw LENGTH subcommand? */
1361 bool saw_continued = false; /* Saw CONTINUED subcommand? */
1362 bool saw_id = false; /* Saw ID subcommand? */
1363 struct file_handle *const fh = fh_get_default_handle ();
1365 assert (case_source_is_complex (vfm_source));
1367 rpd = xmalloc (sizeof *rpd);
1368 rpd->reader = dfm_open_reader (fh);
1369 rpd->first = rpd->last = NULL;
1370 rpd->starts_beg.num = 0;
1371 rpd->starts_beg.var = NULL;
1372 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1373 = rpd->cont_end = rpd->starts_beg;
1374 rpd->id_beg = rpd->id_end = 0;
1376 rpd->id_value = NULL;
1382 if (lex_match_id ("FILE"))
1384 struct file_handle *file;
1386 file = fh_parse (FH_REF_FILE | FH_REF_INLINE);
1391 msg (SE, _("REPEATING DATA must use the same file as its "
1392 "corresponding DATA LIST or FILE TYPE."));
1396 else if (lex_match_id ("STARTS"))
1401 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1406 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1409 lex_negative_to_dash ();
1410 if (lex_match ('-'))
1412 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1415 /* Otherwise, rpd->starts_end is uninitialized. We
1416 will initialize it later from the record length
1417 of the file. We can't do so now because the
1418 file handle may not be specified yet. */
1421 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1422 && rpd->starts_beg.num > rpd->starts_end.num)
1424 msg (SE, _("STARTS beginning column (%d) exceeds "
1425 "STARTS ending column (%d)."),
1426 rpd->starts_beg.num, rpd->starts_end.num);
1430 else if (lex_match_id ("OCCURS"))
1435 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1440 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1443 else if (lex_match_id ("LENGTH"))
1448 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1453 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1456 else if (lex_match_id ("CONTINUED"))
1461 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1464 saw_continued = true;
1466 if (!lex_match ('/'))
1468 if (!parse_num_or_var (&rpd->cont_beg,
1469 "CONTINUED beginning column"))
1472 lex_negative_to_dash ();
1474 && !parse_num_or_var (&rpd->cont_end,
1475 "CONTINUED ending column"))
1478 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1479 && rpd->cont_beg.num > rpd->cont_end.num)
1481 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1482 "CONTINUED ending column (%d)."),
1483 rpd->cont_beg.num, rpd->cont_end.num);
1488 rpd->cont_beg.num = 1;
1490 else if (lex_match_id ("ID"))
1495 msg (SE, _("%s subcommand given multiple times."),"ID");
1500 if (!lex_force_int ())
1502 if (lex_integer () < 1)
1504 msg (SE, _("ID beginning column (%ld) must be positive."),
1508 rpd->id_beg = lex_integer ();
1511 lex_negative_to_dash ();
1513 if (lex_match ('-'))
1515 if (!lex_force_int ())
1517 if (lex_integer () < 1)
1519 msg (SE, _("ID ending column (%ld) must be positive."),
1523 if (lex_integer () < rpd->id_end)
1525 msg (SE, _("ID ending column (%ld) cannot be less than "
1526 "ID beginning column (%d)."),
1527 lex_integer (), rpd->id_beg);
1531 rpd->id_end = lex_integer ();
1534 else rpd->id_end = rpd->id_beg;
1536 if (!lex_force_match ('='))
1538 rpd->id_var = parse_variable ();
1539 if (rpd->id_var == NULL)
1542 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1543 rpd->id_value = xnmalloc (rpd->id_var->nv, sizeof *rpd->id_value);
1545 else if (lex_match_id ("TABLE"))
1547 else if (lex_match_id ("NOTABLE"))
1549 else if (lex_match_id ("DATA"))
1557 if (!lex_force_match ('/'))
1561 /* Comes here when DATA specification encountered. */
1562 if (!saw_starts || !saw_occurs)
1565 msg (SE, _("Missing required specification STARTS."));
1567 msg (SE, _("Missing required specification OCCURS."));
1571 /* Enforce ID restriction. */
1572 if (saw_id && !saw_continued)
1574 msg (SE, _("ID specified without CONTINUED."));
1578 /* Calculate and check starts_end, cont_end if necessary. */
1579 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1581 rpd->starts_end.num = fh_get_record_width (fh);
1582 if (rpd->starts_beg.num != 0
1583 && rpd->starts_beg.num > rpd->starts_end.num)
1585 msg (SE, _("STARTS beginning column (%d) exceeds "
1586 "default STARTS ending column taken from file's "
1587 "record width (%d)."),
1588 rpd->starts_beg.num, rpd->starts_end.num);
1592 if (rpd->cont_end.num == 0 && rpd->cont_end.var == NULL)
1594 rpd->cont_end.num = fh_get_record_width (fh);
1595 if (rpd->cont_beg.num != 0
1596 && rpd->cont_beg.num > rpd->cont_end.num)
1598 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1599 "default CONTINUED ending column taken from file's "
1600 "record width (%d)."),
1601 rpd->cont_beg.num, rpd->cont_end.num);
1607 if (!parse_repeating_data (&rpd->first, &rpd->last))
1610 /* Calculate length if necessary. */
1613 struct dls_var_spec *iter;
1615 for (iter = rpd->first; iter; iter = iter->next)
1616 if (iter->lc > rpd->length.num)
1617 rpd->length.num = iter->lc;
1618 assert (rpd->length.num != 0);
1622 dump_fixed_table (rpd->first, fh, rpd->last->rec);
1624 add_transformation (repeating_data_trns_proc, repeating_data_trns_free, rpd);
1626 return lex_end_of_command ();
1629 repeating_data_trns_free (rpd);
1630 return CMD_CASCADING_FAILURE;
1633 /* Finds the input format specification for variable V and puts
1634 it in SPEC. Because of the way that DATA LIST is structured,
1635 this is nontrivial. */
1637 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1641 for (i = 0; i < n_trns; i++)
1643 struct transformation *trns = &t_trns[i];
1645 if (trns->proc == data_list_trns_proc)
1647 struct data_list_pgm *pgm = trns->private;
1648 struct dls_var_spec *iter;
1650 for (iter = pgm->first; iter; iter = iter->next)
1653 *spec = iter->input;
1662 /* Parses a number or a variable name from the syntax file and puts
1663 the results in VALUE. Ensures that the number is at least 1; else
1664 emits an error based on MESSAGE. Returns nonzero only if
1667 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1672 value->var = parse_variable ();
1673 if (value->var == NULL)
1675 if (value->var->type == ALPHA)
1677 msg (SE, _("String variable not allowed here."));
1681 else if (lex_is_integer ())
1683 value->num = lex_integer ();
1687 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1693 msg (SE, _("Variable or integer expected for %s."), message);
1699 /* Parses data specifications for repeating data groups, adding
1700 them to the linked list with head FIRST and tail LAST.
1701 Returns nonzero only if successful. */
1703 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1705 struct fixed_parsing_state fx;
1711 while (token != '.')
1713 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1716 if (lex_is_number ())
1718 if (!fixed_parse_compatible (&fx, first, last))
1721 else if (token == '(')
1723 if (!fixed_parse_fortran (&fx, first, last))
1728 msg (SE, _("SPSS-like or FORTRAN-like format "
1729 "specification expected after variable names."));
1733 for (i = 0; i < fx.name_cnt; i++)
1741 for (i = 0; i < fx.name_cnt; i++)
1747 /* Obtains the real value for rpd_num_or_var N in case C and returns
1748 it. The valid range is nonnegative numbers, but numbers outside
1749 this range can be returned and should be handled by the caller as
1752 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1756 double v = case_num (c, n->var->fv);
1757 return v != SYSMIS && v >= INT_MIN && v <= INT_MAX ? v : -1;
1763 /* Parameter record passed to rpd_parse_record(). */
1764 struct rpd_parse_info
1766 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1767 const char *line; /* Line being parsed. */
1768 size_t len; /* Line length. */
1769 int beg, end; /* First and last column of first occurrence. */
1770 int ofs; /* Column offset between repeated occurrences. */
1771 struct ccase *c; /* Case to fill in. */
1772 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1773 int max_occurs; /* Max number of occurrences to parse. */
1776 /* Parses one record of repeated data and outputs corresponding
1777 cases. Returns number of occurrences parsed up to the
1778 maximum specified in INFO. */
1780 rpd_parse_record (const struct rpd_parse_info *info)
1782 struct repeating_data_trns *t = info->trns;
1783 int cur = info->beg;
1786 /* Handle record ID values. */
1789 union value id_temp[MAX_ELEMS_PER_VALUE];
1791 /* Parse record ID into V. */
1795 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1796 di.v = info->verify_id ? id_temp : t->id_value;
1799 di.format = t->id_spec;
1806 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1808 char expected_str [MAX_FORMATTED_LEN + 1];
1809 char actual_str [MAX_FORMATTED_LEN + 1];
1811 data_out (expected_str, &t->id_var->print, t->id_value);
1812 expected_str[t->id_var->print.w] = '\0';
1814 data_out (actual_str, &t->id_var->print, id_temp);
1815 actual_str[t->id_var->print.w] = '\0';
1818 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1819 actual_str, expected_str);
1825 /* Iterate over the set of expected occurrences and record each of
1826 them as a separate case. FIXME: We need to execute any
1827 transformations that follow the current one. */
1831 for (occurrences = 0; occurrences < info->max_occurs; )
1833 if (cur + info->ofs > info->end + 1)
1838 struct dls_var_spec *var_spec = t->first;
1840 for (; var_spec; var_spec = var_spec->next)
1842 int fc = var_spec->fc - 1 + cur;
1843 int lc = var_spec->lc - 1 + cur;
1845 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1850 _("Variable %s starting in column %d extends "
1851 "beyond physical record length of %d."),
1852 var_spec->v->name, fc, info->len);
1858 data_in_finite_line (&di, info->line, info->len, fc, lc);
1859 di.v = case_data_rw (info->c, var_spec->fv);
1862 di.format = var_spec->input;
1872 if (!t->write_case (t->wc_data))
1880 /* Reads one set of repetitions of the elements in the REPEATING
1881 DATA structure. Returns TRNS_CONTINUE on success,
1882 TRNS_DROP_CASE on end of file or on failure, or TRNS_NEXT_CASE. */
1884 repeating_data_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
1886 struct repeating_data_trns *t = trns_;
1888 struct fixed_string line; /* Current record. */
1890 int starts_beg; /* Starting column. */
1891 int starts_end; /* Ending column. */
1892 int occurs; /* Number of repetitions. */
1893 int length; /* Length of each occurrence. */
1894 int cont_beg; /* Starting column for continuation lines. */
1895 int cont_end; /* Ending column for continuation lines. */
1897 int occurs_left; /* Number of occurrences remaining. */
1899 int code; /* Return value from rpd_parse_record(). */
1901 int skip_first_record = 0;
1903 dfm_push (t->reader);
1905 /* Read the current record. */
1906 dfm_reread_record (t->reader, 1);
1907 dfm_expand_tabs (t->reader);
1908 if (dfm_eof (t->reader))
1909 return TRNS_DROP_CASE;
1910 dfm_get_record (t->reader, &line);
1911 dfm_forward_record (t->reader);
1913 /* Calculate occurs, length. */
1914 occurs_left = occurs = realize_value (&t->occurs, c);
1917 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1918 return TRNS_NEXT_CASE;
1920 starts_beg = realize_value (&t->starts_beg, c);
1921 if (starts_beg <= 0)
1923 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1926 return TRNS_NEXT_CASE;
1928 starts_end = realize_value (&t->starts_end, c);
1929 if (starts_end < starts_beg)
1931 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1932 "beginning column (%d)."),
1933 starts_end, starts_beg);
1934 skip_first_record = 1;
1936 length = realize_value (&t->length, c);
1939 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1941 occurs = occurs_left = 1;
1943 cont_beg = realize_value (&t->cont_beg, c);
1946 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1949 return TRNS_DROP_CASE;
1951 cont_end = realize_value (&t->cont_end, c);
1952 if (cont_end < cont_beg)
1954 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1955 "beginning column (%d)."),
1956 cont_end, cont_beg);
1957 return TRNS_DROP_CASE;
1960 /* Parse the first record. */
1961 if (!skip_first_record)
1963 struct rpd_parse_info info;
1965 info.line = ls_c_str (&line);
1966 info.len = ls_length (&line);
1967 info.beg = starts_beg;
1968 info.end = starts_end;
1972 info.max_occurs = occurs_left;
1973 code = rpd_parse_record (&info);
1975 return TRNS_DROP_CASE;
1976 occurs_left -= code;
1978 else if (cont_beg == 0)
1979 return TRNS_NEXT_CASE;
1981 /* Make sure, if some occurrences are left, that we have
1982 continuation records. */
1983 if (occurs_left > 0 && cont_beg == 0)
1986 _("Number of repetitions specified on OCCURS (%d) "
1987 "exceed number of repetitions available in "
1988 "space on STARTS (%d), and CONTINUED not specified."),
1989 occurs, (starts_end - starts_beg + 1) / length);
1990 return TRNS_DROP_CASE;
1993 /* Go on to additional records. */
1994 while (occurs_left != 0)
1996 struct rpd_parse_info info;
1998 assert (occurs_left >= 0);
2000 /* Read in another record. */
2001 if (dfm_eof (t->reader))
2004 _("Unexpected end of file with %d repetitions "
2005 "remaining out of %d."),
2006 occurs_left, occurs);
2007 return TRNS_DROP_CASE;
2009 dfm_expand_tabs (t->reader);
2010 dfm_get_record (t->reader, &line);
2011 dfm_forward_record (t->reader);
2013 /* Parse this record. */
2015 info.line = ls_c_str (&line);
2016 info.len = ls_length (&line);
2017 info.beg = cont_beg;
2018 info.end = cont_end;
2022 info.max_occurs = occurs_left;
2023 code = rpd_parse_record (&info);;
2025 return TRNS_DROP_CASE;
2026 occurs_left -= code;
2029 dfm_pop (t->reader);
2031 /* FIXME: This is a kluge until we've implemented multiplexing of
2033 return TRNS_NEXT_CASE;
2036 /* Frees a REPEATING DATA transformation.
2037 Returns true if successful, false if an I/O error occurred. */
2039 repeating_data_trns_free (void *rpd_)
2041 struct repeating_data_trns *rpd = rpd_;
2043 destroy_dls_var_spec (rpd->first);
2044 dfm_close_reader (rpd->reader);
2045 free (rpd->id_value);
2050 /* Lets repeating_data_trns_proc() know how to write the cases
2051 that it composes. Not elegant. */
2053 repeating_data_set_write_case (struct transformation *trns_,
2054 write_case_func *write_case,
2055 write_case_data wc_data)
2057 struct repeating_data_trns *t = trns_->private;
2059 assert (trns_->proc == repeating_data_trns_proc);
2060 t->write_case = write_case;
2061 t->wc_data = wc_data;