1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include "data-list.h"
31 #include "debug-print.h"
32 #include "data-reader.h"
33 #include "dictionary.h"
35 #include "file-handle.h"
43 #include "procedure.h"
46 #define _(msgid) gettext (msgid)
48 /* Utility function. */
50 /* FIXME: Either REPEATING DATA must be the last transformation, or we
51 must multiplex the transformations that follow (i.e., perform them
52 for every case that we produce from a repetition instance).
53 Currently we do neither. We should do one or the other. */
55 /* Describes how to parse one variable. */
58 struct dls_var_spec *next; /* Next specification in list. */
60 /* Both free and fixed formats. */
61 struct fmt_spec input; /* Input format of this field. */
62 struct variable *v; /* Associated variable. Used only in
63 parsing. Not safe later. */
64 int fv; /* First value in case. */
66 /* Fixed format only. */
67 int rec; /* Record number (1-based). */
68 int fc, lc; /* Column numbers in record. */
70 /* Free format only. */
71 char name[LONG_NAME_LEN + 1]; /* Name of variable. */
74 /* Constants for DATA LIST type. */
75 /* Must match table in cmd_data_list(). */
83 /* DATA LIST private data structure. */
86 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
87 struct dfm_reader *reader; /* Data file reader. */
89 int type; /* A DLS_* constant. */
90 struct variable *end; /* Variable specified on END subcommand. */
91 int rec_cnt; /* Number of records. */
92 size_t case_size; /* Case size in bytes. */
93 char *delims; /* Delimiters if any; not null-terminated. */
94 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
97 static const struct case_source_class data_list_source_class;
99 static int parse_fixed (struct data_list_pgm *);
100 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
101 static void dump_fixed_table (const struct dls_var_spec *,
102 const struct file_handle *, int rec_cnt);
103 static void dump_free_table (const struct data_list_pgm *,
104 const struct file_handle *);
105 static void destroy_dls_var_spec (struct dls_var_spec *);
106 static trns_free_func data_list_trns_free;
107 static trns_proc_func data_list_trns_proc;
109 /* Message title for REPEATING DATA. */
110 #define RPD_ERR "REPEATING DATA: "
115 struct data_list_pgm *dls;
116 int table = -1; /* Print table if nonzero, -1=undecided. */
117 struct file_handle *fh = fh_inline_file ();
119 if (!case_source_is_complex (vfm_source))
120 discard_variables ();
122 dls = xmalloc (sizeof *dls);
129 dls->first = dls->last = NULL;
133 if (lex_match_id ("FILE"))
136 fh = fh_parse (FH_REF_FILE | FH_REF_INLINE);
139 if (case_source_is_class (vfm_source, &file_type_source_class)
140 && fh != fh_get_default_handle ())
142 msg (SE, _("DATA LIST must use the same file "
143 "as the enclosing FILE TYPE."));
147 else if (lex_match_id ("RECORDS"))
151 if (!lex_force_int ())
153 dls->rec_cnt = lex_integer ();
157 else if (lex_match_id ("END"))
161 msg (SE, _("The END subcommand may only be specified once."));
166 if (!lex_force_id ())
168 dls->end = dict_lookup_var (default_dict, tokid);
170 dls->end = dict_create_var_assert (default_dict, tokid, 0);
173 else if (token == T_ID)
175 if (lex_match_id ("NOTABLE"))
177 else if (lex_match_id ("TABLE"))
182 if (lex_match_id ("FIXED"))
184 else if (lex_match_id ("FREE"))
186 else if (lex_match_id ("LIST"))
196 msg (SE, _("Only one of FIXED, FREE, or LIST may "
202 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
205 while (!lex_match (')'))
209 if (lex_match_id ("TAB"))
211 else if (token == T_STRING && tokstr.length == 1)
213 delim = tokstr.string[0];
222 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
223 dls->delims[dls->delim_cnt++] = delim;
237 dls->case_size = dict_get_case_size (default_dict);
238 fh_set_default_handle (fh);
241 dls->type = DLS_FIXED;
245 if (dls->type == DLS_FREE)
251 if (dls->type == DLS_FIXED)
253 if (!parse_fixed (dls))
256 dump_fixed_table (dls->first, fh, dls->rec_cnt);
260 if (!parse_free (&dls->first, &dls->last))
263 dump_free_table (dls, fh);
266 dls->reader = dfm_open_reader (fh);
267 if (dls->reader == NULL)
270 if (vfm_source != NULL)
271 add_transformation (data_list_trns_proc, data_list_trns_free, dls);
273 vfm_source = create_case_source (&data_list_source_class, dls);
278 data_list_trns_free (dls);
279 return CMD_CASCADING_FAILURE;
282 /* Adds SPEC to the linked list with head at FIRST and tail at
285 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
286 struct dls_var_spec *spec)
293 (*last)->next = spec;
297 /* Fixed-format parsing. */
299 /* Used for chaining together fortran-like format specifiers. */
302 struct fmt_list *next;
305 struct fmt_list *down;
308 /* State of parsing DATA LIST. */
309 struct fixed_parsing_state
311 char **name; /* Variable names. */
312 size_t name_cnt; /* Number of names. */
314 int recno; /* Index of current record. */
315 int sc; /* 1-based column number of starting column for
316 next field to output. */
319 static int fixed_parse_compatible (struct fixed_parsing_state *,
320 struct dls_var_spec **,
321 struct dls_var_spec **);
322 static int fixed_parse_fortran (struct fixed_parsing_state *,
323 struct dls_var_spec **,
324 struct dls_var_spec **);
326 /* Parses all the variable specifications for DATA LIST FIXED,
327 storing them into DLS. Returns nonzero if successful. */
329 parse_fixed (struct data_list_pgm *dls)
331 struct fixed_parsing_state fx;
339 while (lex_match ('/'))
342 if (lex_is_integer ())
344 if (lex_integer () < fx.recno)
346 msg (SE, _("The record number specified, %ld, is "
347 "before the previous record, %d. Data "
348 "fields must be listed in order of "
349 "increasing record number."),
350 lex_integer (), fx.recno - 1);
354 fx.recno = lex_integer ();
360 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
363 if (lex_is_number ())
365 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
368 else if (token == '(')
370 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
375 msg (SE, _("SPSS-like or FORTRAN-like format "
376 "specification expected after variable names."));
380 for (i = 0; i < fx.name_cnt; i++)
384 if (dls->first == NULL)
386 msg (SE, _("At least one variable must be specified."));
389 if (dls->rec_cnt && dls->last->rec > dls->rec_cnt)
391 msg (SE, _("Variables are specified on records that "
392 "should not exist according to RECORDS subcommand."));
395 else if (!dls->rec_cnt)
396 dls->rec_cnt = dls->last->rec;
397 return lex_end_of_command () == CMD_SUCCESS;
400 for (i = 0; i < fx.name_cnt; i++)
406 /* Parses a variable specification in the form 1-10 (A) based on
407 FX and adds specifications to the linked list with head at
408 FIRST and tail at LAST. */
410 fixed_parse_compatible (struct fixed_parsing_state *fx,
411 struct dls_var_spec **first, struct dls_var_spec **last)
413 struct fmt_spec input;
419 if (!lex_force_int ())
424 msg (SE, _("Column positions for fields must be positive."));
430 lex_negative_to_dash ();
433 if (!lex_force_int ())
438 msg (SE, _("Column positions for fields must be positive."));
443 msg (SE, _("The ending column for a field must be "
444 "greater than the starting column."));
453 /* Divide columns evenly. */
454 input.w = (lc - fc + 1) / fx->name_cnt;
455 if ((lc - fc + 1) % fx->name_cnt)
457 msg (SE, _("The %d columns %d-%d "
458 "can't be evenly divided into %d fields."),
459 lc - fc + 1, fc, lc, fx->name_cnt);
463 /* Format specifier. */
466 struct fmt_desc *fdp;
472 input.type = parse_format_specifier_name (&cp, 0);
473 if (input.type == -1)
477 msg (SE, _("A format specifier on this line "
478 "has extra characters on the end."));
488 if (lex_is_integer ())
490 if (lex_integer () < 1)
492 msg (SE, _("The value for number of decimal places "
493 "must be at least 1."));
497 input.d = lex_integer ();
503 fdp = &formats[input.type];
504 if (fdp->n_args < 2 && input.d)
506 msg (SE, _("Input format %s doesn't accept decimal places."),
514 if (!lex_force_match (')'))
522 if (!check_input_specifier (&input, 1))
525 /* Start column for next specification. */
528 /* Width of variables to create. */
529 if (input.type == FMT_A || input.type == FMT_AHEX)
534 /* Create variables and var specs. */
535 for (i = 0; i < fx->name_cnt; i++)
537 struct dls_var_spec *spec;
540 v = dict_create_var (default_dict, fx->name[i], width);
543 convert_fmt_ItoO (&input, &v->print);
545 if (!case_source_is_complex (vfm_source))
550 v = dict_lookup_var_assert (default_dict, fx->name[i]);
551 if (vfm_source == NULL)
553 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
556 if ((width != 0) != (v->width != 0))
558 msg (SE, _("There is already a variable %s of a "
563 if (width != 0 && width != v->width)
565 msg (SE, _("There is already a string variable %s of a "
566 "different width."), fx->name[i]);
571 spec = xmalloc (sizeof *spec);
575 spec->rec = fx->recno;
576 spec->fc = fc + input.w * i;
577 spec->lc = spec->fc + input.w - 1;
578 append_var_spec (first, last, spec);
583 /* Destroy format list F and, if RECURSE is nonzero, all its
586 destroy_fmt_list (struct fmt_list *f, int recurse)
588 struct fmt_list *next;
593 if (recurse && f->f.type == FMT_DESCEND)
594 destroy_fmt_list (f->down, 1);
599 /* Takes a hierarchically structured fmt_list F as constructed by
600 fixed_parse_fortran(), and flattens it, adding the variable
601 specifications to the linked list with head FIRST and tail
602 LAST. NAME_IDX is used to take values from the list of names
603 in FX; it should initially point to a value of 0. */
605 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
606 struct dls_var_spec **first, struct dls_var_spec **last,
611 for (; f; f = f->next)
612 if (f->f.type == FMT_X)
614 else if (f->f.type == FMT_T)
616 else if (f->f.type == FMT_NEWREC)
618 fx->recno += f->count;
622 for (i = 0; i < f->count; i++)
623 if (f->f.type == FMT_DESCEND)
625 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
630 struct dls_var_spec *spec;
634 if (formats[f->f.type].cat & FCAT_STRING)
638 if (*name_idx >= fx->name_cnt)
640 msg (SE, _("The number of format "
641 "specifications exceeds the given number of "
646 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
649 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
653 if (!case_source_is_complex (vfm_source))
656 spec = xmalloc (sizeof *spec);
660 spec->rec = fx->recno;
662 spec->lc = fx->sc + f->f.w - 1;
663 append_var_spec (first, last, spec);
665 convert_fmt_ItoO (&spec->input, &v->print);
673 /* Recursively parses a FORTRAN-like format specification into
674 the linked list with head FIRST and tail TAIL. LEVEL is the
675 level of recursion, starting from 0. Returns the parsed
676 specification if successful, or a null pointer on failure. */
677 static struct fmt_list *
678 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
679 struct dls_var_spec **first,
680 struct dls_var_spec **last)
682 struct fmt_list *head = NULL;
683 struct fmt_list *tail = NULL;
685 lex_force_match ('(');
689 struct fmt_list *new = xmalloc (sizeof *new);
692 /* Append new to list. */
700 if (lex_is_integer ())
702 new->count = lex_integer ();
708 /* Parse format specifier. */
711 new->f.type = FMT_DESCEND;
712 new->down = fixed_parse_fortran_internal (fx, first, last);
713 if (new->down == NULL)
716 else if (lex_match ('/'))
717 new->f.type = FMT_NEWREC;
718 else if (!parse_format_specifier (&new->f, FMTP_ALLOW_XT)
719 || !check_input_specifier (&new->f, 1))
724 lex_force_match (')');
729 destroy_fmt_list (head, 0);
734 /* Parses a FORTRAN-like format specification into the linked
735 list with head FIRST and tail LAST. Returns nonzero if
738 fixed_parse_fortran (struct fixed_parsing_state *fx,
739 struct dls_var_spec **first, struct dls_var_spec **last)
741 struct fmt_list *list;
744 list = fixed_parse_fortran_internal (fx, first, last);
749 dump_fmt_list (fx, list, first, last, &name_idx);
750 destroy_fmt_list (list, 1);
751 if (name_idx < fx->name_cnt)
753 msg (SE, _("There aren't enough format specifications "
754 "to match the number of variable names given."));
761 /* Displays a table giving information on fixed-format variable
762 parsing on DATA LIST. */
763 /* FIXME: The `Columns' column should be divided into three columns,
764 one for the starting column, one for the dash, one for the ending
765 column; then right-justify the starting column and left-justify the
768 dump_fixed_table (const struct dls_var_spec *specs,
769 const struct file_handle *fh, int rec_cnt)
771 const struct dls_var_spec *spec;
775 for (i = 0, spec = specs; spec; spec = spec->next)
777 t = tab_create (4, i + 1, 0);
778 tab_columns (t, TAB_COL_DOWN, 1);
779 tab_headers (t, 0, 0, 1, 0);
780 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
781 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
782 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
783 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
784 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
785 tab_hline (t, TAL_2, 0, 3, 1);
786 tab_dim (t, tab_natural_dimensions);
788 for (i = 1, spec = specs; spec; spec = spec->next, i++)
790 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
791 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
792 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
794 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
795 fmt_to_string (&spec->input));
798 tab_title (t, 1, ngettext ("Reading %d record from %s.",
799 "Reading %d records from %s.", rec_cnt),
800 rec_cnt, fh_get_name (fh));
804 /* Free-format parsing. */
806 /* Parses variable specifications for DATA LIST FREE and adds
807 them to the linked list with head FIRST and tail LAST.
808 Returns nonzero only if successful. */
810 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
815 struct fmt_spec input, output;
821 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
826 if (!parse_format_specifier (&input, 0)
827 || !check_input_specifier (&input, 1)
828 || !lex_force_match (')'))
830 for (i = 0; i < name_cnt; i++)
835 convert_fmt_ItoO (&input, &output);
840 input = make_input_format (FMT_F, 8, 0);
841 output = *get_format ();
844 if (input.type == FMT_A || input.type == FMT_AHEX)
848 for (i = 0; i < name_cnt; i++)
850 struct dls_var_spec *spec;
853 v = dict_create_var (default_dict, name[i], width);
857 msg (SE, _("%s is a duplicate variable name."), name[i]);
860 v->print = v->write = output;
862 if (!case_source_is_complex (vfm_source))
865 spec = xmalloc (sizeof *spec);
869 str_copy_trunc (spec->name, sizeof spec->name, v->name);
870 append_var_spec (first, last, spec);
872 for (i = 0; i < name_cnt; i++)
877 return lex_end_of_command () == CMD_SUCCESS;
880 /* Displays a table giving information on free-format variable parsing
883 dump_free_table (const struct data_list_pgm *dls,
884 const struct file_handle *fh)
890 struct dls_var_spec *spec;
891 for (i = 0, spec = dls->first; spec; spec = spec->next)
895 t = tab_create (2, i + 1, 0);
896 tab_columns (t, TAB_COL_DOWN, 1);
897 tab_headers (t, 0, 0, 1, 0);
898 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
899 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
900 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
901 tab_hline (t, TAL_2, 0, 1, 1);
902 tab_dim (t, tab_natural_dimensions);
905 struct dls_var_spec *spec;
907 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
909 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
910 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
914 tab_title (t, 1, _("Reading free-form data from %s."), fh_get_name (fh));
919 /* Input procedure. */
921 /* Extracts a field from the current position in the current
922 record. Fields can be unquoted or quoted with single- or
923 double-quote characters. *FIELD is set to the field content.
924 After parsing the field, sets the current position in the
925 record to just past the field and any trailing delimiter.
926 END_BLANK is used internally; it should be initialized by the
927 caller to 0 and left alone afterward. Returns 0 on failure or
928 a 1-based column number indicating the beginning of the field
931 cut_field (const struct data_list_pgm *dls, struct fixed_string *field,
934 struct fixed_string line;
938 if (dfm_eof (dls->reader))
940 if (dls->delim_cnt == 0)
941 dfm_expand_tabs (dls->reader);
942 dfm_get_record (dls->reader, &line);
944 cp = ls_c_str (&line);
945 if (dls->delim_cnt == 0)
947 /* Skip leading whitespace. */
948 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
950 if (cp >= ls_end (&line))
953 /* Handle actual data, whether quoted or unquoted. */
954 if (*cp == '\'' || *cp == '"')
958 field->string = ++cp;
959 while (cp < ls_end (&line) && *cp != quote)
961 field->length = cp - field->string;
962 if (cp < ls_end (&line))
965 msg (SW, _("Quoted string missing terminating `%c'."), quote);
970 while (cp < ls_end (&line)
971 && !isspace ((unsigned char) *cp) && *cp != ',')
973 field->length = cp - field->string;
976 /* Skip trailing whitespace and a single comma if present. */
977 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
979 if (cp < ls_end (&line) && *cp == ',')
984 if (cp >= ls_end (&line))
986 int column = dfm_column_start (dls->reader);
987 /* A blank line or a line that ends in \t has a
988 trailing blank field. */
989 if (column == 1 || (column > 1 && cp[-1] == '\t'))
994 field->string = ls_end (&line);
996 dfm_forward_record (dls->reader);
1011 while (cp < ls_end (&line)
1012 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1014 field->length = cp - field->string;
1015 if (cp < ls_end (&line))
1020 dfm_forward_columns (dls->reader, field->string - line.string);
1021 column_start = dfm_column_start (dls->reader);
1023 dfm_forward_columns (dls->reader, cp - field->string);
1025 return column_start;
1028 static bool read_from_data_list_fixed (const struct data_list_pgm *,
1030 static bool read_from_data_list_free (const struct data_list_pgm *,
1032 static bool read_from_data_list_list (const struct data_list_pgm *,
1035 /* Reads a case from DLS into C.
1036 Returns true if successful, false at end of file or on I/O error. */
1038 read_from_data_list (const struct data_list_pgm *dls, struct ccase *c)
1042 dfm_push (dls->reader);
1046 retval = read_from_data_list_fixed (dls, c);
1049 retval = read_from_data_list_free (dls, c);
1052 retval = read_from_data_list_list (dls, c);
1057 dfm_pop (dls->reader);
1062 /* Reads a case from the data file into C, parsing it according
1063 to fixed-format syntax rules in DLS.
1064 Returns true if successful, false at end of file or on I/O error. */
1066 read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c)
1068 struct dls_var_spec *var_spec = dls->first;
1071 if (dfm_eof (dls->reader))
1073 for (i = 1; i <= dls->rec_cnt; i++)
1075 struct fixed_string line;
1077 if (dfm_eof (dls->reader))
1079 /* Note that this can't occur on the first record. */
1080 msg (SW, _("Partial case of %d of %d records discarded."),
1081 i - 1, dls->rec_cnt);
1084 dfm_expand_tabs (dls->reader);
1085 dfm_get_record (dls->reader, &line);
1087 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1091 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1092 var_spec->fc, var_spec->lc);
1093 di.v = case_data_rw (c, var_spec->fv);
1094 di.flags = DI_IMPLIED_DECIMALS;
1095 di.f1 = var_spec->fc;
1096 di.format = var_spec->input;
1101 dfm_forward_record (dls->reader);
1107 /* Reads a case from the data file into C, parsing it according
1108 to free-format syntax rules in DLS.
1109 Returns true if successful, false at end of file or on I/O error. */
1111 read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c)
1113 struct dls_var_spec *var_spec;
1116 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1118 struct fixed_string field;
1121 /* Cut out a field and read in a new record if necessary. */
1124 column = cut_field (dls, &field, &end_blank);
1128 if (!dfm_eof (dls->reader))
1129 dfm_forward_record (dls->reader);
1130 if (dfm_eof (dls->reader))
1132 if (var_spec != dls->first)
1133 msg (SW, _("Partial case discarded. The first variable "
1134 "missing was %s."), var_spec->name);
1142 di.s = ls_c_str (&field);
1143 di.e = ls_end (&field);
1144 di.v = case_data_rw (c, var_spec->fv);
1147 di.format = var_spec->input;
1154 /* Reads a case from the data file and parses it according to
1155 list-format syntax rules.
1156 Returns true if successful, false at end of file or on I/O error. */
1158 read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c)
1160 struct dls_var_spec *var_spec;
1163 if (dfm_eof (dls->reader))
1166 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1168 struct fixed_string field;
1171 /* Cut out a field and check for end-of-line. */
1172 column = cut_field (dls, &field, &end_blank);
1175 if (get_undefined ())
1176 msg (SW, _("Missing value(s) for all variables from %s onward. "
1177 "These will be filled with the system-missing value "
1178 "or blanks, as appropriate."),
1180 for (; var_spec; var_spec = var_spec->next)
1182 int width = get_format_var_width (&var_spec->input);
1184 case_data_rw (c, var_spec->fv)->f = SYSMIS;
1186 memset (case_data_rw (c, var_spec->fv)->s, ' ', width);
1194 di.s = ls_c_str (&field);
1195 di.e = ls_end (&field);
1196 di.v = case_data_rw (c, var_spec->fv);
1199 di.format = var_spec->input;
1204 dfm_forward_record (dls->reader);
1208 /* Destroys SPEC. */
1210 destroy_dls_var_spec (struct dls_var_spec *spec)
1212 struct dls_var_spec *next;
1214 while (spec != NULL)
1222 /* Destroys DATA LIST transformation DLS.
1223 Returns true if successful, false if an I/O error occurred. */
1225 data_list_trns_free (void *dls_)
1227 struct data_list_pgm *dls = dls_;
1229 destroy_dls_var_spec (dls->first);
1230 dfm_close_reader (dls->reader);
1235 /* Handle DATA LIST transformation DLS, parsing data into C. */
1237 data_list_trns_proc (void *dls_, struct ccase *c, int case_num UNUSED)
1239 struct data_list_pgm *dls = dls_;
1242 if (read_from_data_list (dls, c))
1243 retval = TRNS_CONTINUE;
1244 else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1)
1246 /* An I/O error, or encountering end of file for a second
1247 time, should be escalated into a more serious error. */
1248 retval = TRNS_ERROR;
1251 retval = TRNS_DROP_CASE;
1253 /* If there was an END subcommand handle it. */
1254 if (dls->end != NULL)
1256 double *end = &case_data_rw (c, dls->end->fv)->f;
1257 if (retval == TRNS_DROP_CASE)
1260 retval = TRNS_CONTINUE;
1269 /* Reads all the records from the data file and passes them to
1271 Returns true if successful, false if an I/O error occurred. */
1273 data_list_source_read (struct case_source *source,
1275 write_case_func *write_case, write_case_data wc_data)
1277 struct data_list_pgm *dls = source->aux;
1283 if (!read_from_data_list (dls, c))
1284 return !dfm_reader_error (dls->reader);
1286 dfm_push (dls->reader);
1287 ok = write_case (wc_data);
1288 dfm_pop (dls->reader);
1294 /* Destroys the source's internal data. */
1296 data_list_source_destroy (struct case_source *source)
1298 data_list_trns_free (source->aux);
1301 static const struct case_source_class data_list_source_class =
1305 data_list_source_read,
1306 data_list_source_destroy,
1309 /* REPEATING DATA. */
1311 /* Represents a number or a variable. */
1312 struct rpd_num_or_var
1314 int num; /* Value, or 0. */
1315 struct variable *var; /* Variable, if number==0. */
1318 /* REPEATING DATA private data structure. */
1319 struct repeating_data_trns
1321 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1322 struct dfm_reader *reader; /* Input file, never NULL. */
1324 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1325 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1326 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1327 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1328 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1329 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1331 /* ID subcommand. */
1332 int id_beg, id_end; /* Beginning & end columns. */
1333 struct variable *id_var; /* DATA LIST variable. */
1334 struct fmt_spec id_spec; /* Input format spec. */
1335 union value *id_value; /* ID value. */
1337 write_case_func *write_case;
1338 write_case_data wc_data;
1341 static trns_free_func repeating_data_trns_free;
1342 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1343 static int parse_repeating_data (struct dls_var_spec **,
1344 struct dls_var_spec **);
1345 static void find_variable_input_spec (struct variable *v,
1346 struct fmt_spec *spec);
1348 /* Parses the REPEATING DATA command. */
1350 cmd_repeating_data (void)
1352 struct repeating_data_trns *rpd;
1353 int table = 1; /* Print table? */
1354 bool saw_starts = false; /* Saw STARTS subcommand? */
1355 bool saw_occurs = false; /* Saw OCCURS subcommand? */
1356 bool saw_length = false; /* Saw LENGTH subcommand? */
1357 bool saw_continued = false; /* Saw CONTINUED subcommand? */
1358 bool saw_id = false; /* Saw ID subcommand? */
1359 struct file_handle *const fh = fh_get_default_handle ();
1361 assert (case_source_is_complex (vfm_source));
1363 rpd = xmalloc (sizeof *rpd);
1364 rpd->reader = dfm_open_reader (fh);
1365 rpd->first = rpd->last = NULL;
1366 rpd->starts_beg.num = 0;
1367 rpd->starts_beg.var = NULL;
1368 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1369 = rpd->cont_end = rpd->starts_beg;
1370 rpd->id_beg = rpd->id_end = 0;
1372 rpd->id_value = NULL;
1378 if (lex_match_id ("FILE"))
1380 struct file_handle *file;
1382 file = fh_parse (FH_REF_FILE | FH_REF_INLINE);
1387 msg (SE, _("REPEATING DATA must use the same file as its "
1388 "corresponding DATA LIST or FILE TYPE."));
1392 else if (lex_match_id ("STARTS"))
1397 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1402 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1405 lex_negative_to_dash ();
1406 if (lex_match ('-'))
1408 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1411 /* Otherwise, rpd->starts_end is uninitialized. We
1412 will initialize it later from the record length
1413 of the file. We can't do so now because the
1414 file handle may not be specified yet. */
1417 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1418 && rpd->starts_beg.num > rpd->starts_end.num)
1420 msg (SE, _("STARTS beginning column (%d) exceeds "
1421 "STARTS ending column (%d)."),
1422 rpd->starts_beg.num, rpd->starts_end.num);
1426 else if (lex_match_id ("OCCURS"))
1431 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1436 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1439 else if (lex_match_id ("LENGTH"))
1444 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1449 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1452 else if (lex_match_id ("CONTINUED"))
1457 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1460 saw_continued = true;
1462 if (!lex_match ('/'))
1464 if (!parse_num_or_var (&rpd->cont_beg,
1465 "CONTINUED beginning column"))
1468 lex_negative_to_dash ();
1470 && !parse_num_or_var (&rpd->cont_end,
1471 "CONTINUED ending column"))
1474 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1475 && rpd->cont_beg.num > rpd->cont_end.num)
1477 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1478 "CONTINUED ending column (%d)."),
1479 rpd->cont_beg.num, rpd->cont_end.num);
1484 rpd->cont_beg.num = 1;
1486 else if (lex_match_id ("ID"))
1491 msg (SE, _("%s subcommand given multiple times."),"ID");
1496 if (!lex_force_int ())
1498 if (lex_integer () < 1)
1500 msg (SE, _("ID beginning column (%ld) must be positive."),
1504 rpd->id_beg = lex_integer ();
1507 lex_negative_to_dash ();
1509 if (lex_match ('-'))
1511 if (!lex_force_int ())
1513 if (lex_integer () < 1)
1515 msg (SE, _("ID ending column (%ld) must be positive."),
1519 if (lex_integer () < rpd->id_end)
1521 msg (SE, _("ID ending column (%ld) cannot be less than "
1522 "ID beginning column (%d)."),
1523 lex_integer (), rpd->id_beg);
1527 rpd->id_end = lex_integer ();
1530 else rpd->id_end = rpd->id_beg;
1532 if (!lex_force_match ('='))
1534 rpd->id_var = parse_variable ();
1535 if (rpd->id_var == NULL)
1538 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1539 rpd->id_value = xnmalloc (rpd->id_var->nv, sizeof *rpd->id_value);
1541 else if (lex_match_id ("TABLE"))
1543 else if (lex_match_id ("NOTABLE"))
1545 else if (lex_match_id ("DATA"))
1553 if (!lex_force_match ('/'))
1557 /* Comes here when DATA specification encountered. */
1558 if (!saw_starts || !saw_occurs)
1561 msg (SE, _("Missing required specification STARTS."));
1563 msg (SE, _("Missing required specification OCCURS."));
1567 /* Enforce ID restriction. */
1568 if (saw_id && !saw_continued)
1570 msg (SE, _("ID specified without CONTINUED."));
1574 /* Calculate and check starts_end, cont_end if necessary. */
1575 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1577 rpd->starts_end.num = fh_get_record_width (fh);
1578 if (rpd->starts_beg.num != 0
1579 && rpd->starts_beg.num > rpd->starts_end.num)
1581 msg (SE, _("STARTS beginning column (%d) exceeds "
1582 "default STARTS ending column taken from file's "
1583 "record width (%d)."),
1584 rpd->starts_beg.num, rpd->starts_end.num);
1588 if (rpd->cont_end.num == 0 && rpd->cont_end.var == NULL)
1590 rpd->cont_end.num = fh_get_record_width (fh);
1591 if (rpd->cont_beg.num != 0
1592 && rpd->cont_beg.num > rpd->cont_end.num)
1594 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1595 "default CONTINUED ending column taken from file's "
1596 "record width (%d)."),
1597 rpd->cont_beg.num, rpd->cont_end.num);
1603 if (!parse_repeating_data (&rpd->first, &rpd->last))
1606 /* Calculate length if necessary. */
1609 struct dls_var_spec *iter;
1611 for (iter = rpd->first; iter; iter = iter->next)
1612 if (iter->lc > rpd->length.num)
1613 rpd->length.num = iter->lc;
1614 assert (rpd->length.num != 0);
1618 dump_fixed_table (rpd->first, fh, rpd->last->rec);
1620 add_transformation (repeating_data_trns_proc, repeating_data_trns_free, rpd);
1622 return lex_end_of_command ();
1625 repeating_data_trns_free (rpd);
1626 return CMD_CASCADING_FAILURE;
1629 /* Finds the input format specification for variable V and puts
1630 it in SPEC. Because of the way that DATA LIST is structured,
1631 this is nontrivial. */
1633 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1637 for (i = 0; i < n_trns; i++)
1639 struct transformation *trns = &t_trns[i];
1641 if (trns->proc == data_list_trns_proc)
1643 struct data_list_pgm *pgm = trns->private;
1644 struct dls_var_spec *iter;
1646 for (iter = pgm->first; iter; iter = iter->next)
1649 *spec = iter->input;
1658 /* Parses a number or a variable name from the syntax file and puts
1659 the results in VALUE. Ensures that the number is at least 1; else
1660 emits an error based on MESSAGE. Returns nonzero only if
1663 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1668 value->var = parse_variable ();
1669 if (value->var == NULL)
1671 if (value->var->type == ALPHA)
1673 msg (SE, _("String variable not allowed here."));
1677 else if (lex_is_integer ())
1679 value->num = lex_integer ();
1683 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1689 msg (SE, _("Variable or integer expected for %s."), message);
1695 /* Parses data specifications for repeating data groups, adding
1696 them to the linked list with head FIRST and tail LAST.
1697 Returns nonzero only if successful. */
1699 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1701 struct fixed_parsing_state fx;
1707 while (token != '.')
1709 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1712 if (lex_is_number ())
1714 if (!fixed_parse_compatible (&fx, first, last))
1717 else if (token == '(')
1719 if (!fixed_parse_fortran (&fx, first, last))
1724 msg (SE, _("SPSS-like or FORTRAN-like format "
1725 "specification expected after variable names."));
1729 for (i = 0; i < fx.name_cnt; i++)
1737 for (i = 0; i < fx.name_cnt; i++)
1743 /* Obtains the real value for rpd_num_or_var N in case C and returns
1744 it. The valid range is nonnegative numbers, but numbers outside
1745 this range can be returned and should be handled by the caller as
1748 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1752 double v = case_num (c, n->var->fv);
1753 return v != SYSMIS && v >= INT_MIN && v <= INT_MAX ? v : -1;
1759 /* Parameter record passed to rpd_parse_record(). */
1760 struct rpd_parse_info
1762 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1763 const char *line; /* Line being parsed. */
1764 size_t len; /* Line length. */
1765 int beg, end; /* First and last column of first occurrence. */
1766 int ofs; /* Column offset between repeated occurrences. */
1767 struct ccase *c; /* Case to fill in. */
1768 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1769 int max_occurs; /* Max number of occurrences to parse. */
1772 /* Parses one record of repeated data and outputs corresponding
1773 cases. Returns number of occurrences parsed up to the
1774 maximum specified in INFO. */
1776 rpd_parse_record (const struct rpd_parse_info *info)
1778 struct repeating_data_trns *t = info->trns;
1779 int cur = info->beg;
1782 /* Handle record ID values. */
1785 union value id_temp[MAX_ELEMS_PER_VALUE];
1787 /* Parse record ID into V. */
1791 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1792 di.v = info->verify_id ? id_temp : t->id_value;
1795 di.format = t->id_spec;
1802 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1804 char expected_str [MAX_FORMATTED_LEN + 1];
1805 char actual_str [MAX_FORMATTED_LEN + 1];
1807 data_out (expected_str, &t->id_var->print, t->id_value);
1808 expected_str[t->id_var->print.w] = '\0';
1810 data_out (actual_str, &t->id_var->print, id_temp);
1811 actual_str[t->id_var->print.w] = '\0';
1814 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1815 actual_str, expected_str);
1821 /* Iterate over the set of expected occurrences and record each of
1822 them as a separate case. FIXME: We need to execute any
1823 transformations that follow the current one. */
1827 for (occurrences = 0; occurrences < info->max_occurs; )
1829 if (cur + info->ofs > info->end + 1)
1834 struct dls_var_spec *var_spec = t->first;
1836 for (; var_spec; var_spec = var_spec->next)
1838 int fc = var_spec->fc - 1 + cur;
1839 int lc = var_spec->lc - 1 + cur;
1841 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1846 _("Variable %s starting in column %d extends "
1847 "beyond physical record length of %d."),
1848 var_spec->v->name, fc, info->len);
1854 data_in_finite_line (&di, info->line, info->len, fc, lc);
1855 di.v = case_data_rw (info->c, var_spec->fv);
1858 di.format = var_spec->input;
1868 if (!t->write_case (t->wc_data))
1876 /* Reads one set of repetitions of the elements in the REPEATING
1877 DATA structure. Returns TRNS_CONTINUE on success,
1878 TRNS_DROP_CASE on end of file or on failure, or TRNS_NEXT_CASE. */
1880 repeating_data_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
1882 struct repeating_data_trns *t = trns_;
1884 struct fixed_string line; /* Current record. */
1886 int starts_beg; /* Starting column. */
1887 int starts_end; /* Ending column. */
1888 int occurs; /* Number of repetitions. */
1889 int length; /* Length of each occurrence. */
1890 int cont_beg; /* Starting column for continuation lines. */
1891 int cont_end; /* Ending column for continuation lines. */
1893 int occurs_left; /* Number of occurrences remaining. */
1895 int code; /* Return value from rpd_parse_record(). */
1897 int skip_first_record = 0;
1899 dfm_push (t->reader);
1901 /* Read the current record. */
1902 dfm_reread_record (t->reader, 1);
1903 dfm_expand_tabs (t->reader);
1904 if (dfm_eof (t->reader))
1905 return TRNS_DROP_CASE;
1906 dfm_get_record (t->reader, &line);
1907 dfm_forward_record (t->reader);
1909 /* Calculate occurs, length. */
1910 occurs_left = occurs = realize_value (&t->occurs, c);
1913 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1914 return TRNS_NEXT_CASE;
1916 starts_beg = realize_value (&t->starts_beg, c);
1917 if (starts_beg <= 0)
1919 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1922 return TRNS_NEXT_CASE;
1924 starts_end = realize_value (&t->starts_end, c);
1925 if (starts_end < starts_beg)
1927 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1928 "beginning column (%d)."),
1929 starts_end, starts_beg);
1930 skip_first_record = 1;
1932 length = realize_value (&t->length, c);
1935 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1937 occurs = occurs_left = 1;
1939 cont_beg = realize_value (&t->cont_beg, c);
1942 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1945 return TRNS_DROP_CASE;
1947 cont_end = realize_value (&t->cont_end, c);
1948 if (cont_end < cont_beg)
1950 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1951 "beginning column (%d)."),
1952 cont_end, cont_beg);
1953 return TRNS_DROP_CASE;
1956 /* Parse the first record. */
1957 if (!skip_first_record)
1959 struct rpd_parse_info info;
1961 info.line = ls_c_str (&line);
1962 info.len = ls_length (&line);
1963 info.beg = starts_beg;
1964 info.end = starts_end;
1968 info.max_occurs = occurs_left;
1969 code = rpd_parse_record (&info);
1971 return TRNS_DROP_CASE;
1972 occurs_left -= code;
1974 else if (cont_beg == 0)
1975 return TRNS_NEXT_CASE;
1977 /* Make sure, if some occurrences are left, that we have
1978 continuation records. */
1979 if (occurs_left > 0 && cont_beg == 0)
1982 _("Number of repetitions specified on OCCURS (%d) "
1983 "exceed number of repetitions available in "
1984 "space on STARTS (%d), and CONTINUED not specified."),
1985 occurs, (starts_end - starts_beg + 1) / length);
1986 return TRNS_DROP_CASE;
1989 /* Go on to additional records. */
1990 while (occurs_left != 0)
1992 struct rpd_parse_info info;
1994 assert (occurs_left >= 0);
1996 /* Read in another record. */
1997 if (dfm_eof (t->reader))
2000 _("Unexpected end of file with %d repetitions "
2001 "remaining out of %d."),
2002 occurs_left, occurs);
2003 return TRNS_DROP_CASE;
2005 dfm_expand_tabs (t->reader);
2006 dfm_get_record (t->reader, &line);
2007 dfm_forward_record (t->reader);
2009 /* Parse this record. */
2011 info.line = ls_c_str (&line);
2012 info.len = ls_length (&line);
2013 info.beg = cont_beg;
2014 info.end = cont_end;
2018 info.max_occurs = occurs_left;
2019 code = rpd_parse_record (&info);;
2021 return TRNS_DROP_CASE;
2022 occurs_left -= code;
2025 dfm_pop (t->reader);
2027 /* FIXME: This is a kluge until we've implemented multiplexing of
2029 return TRNS_NEXT_CASE;
2032 /* Frees a REPEATING DATA transformation.
2033 Returns true if successful, false if an I/O error occurred. */
2035 repeating_data_trns_free (void *rpd_)
2037 struct repeating_data_trns *rpd = rpd_;
2039 destroy_dls_var_spec (rpd->first);
2040 dfm_close_reader (rpd->reader);
2041 free (rpd->id_value);
2046 /* Lets repeating_data_trns_proc() know how to write the cases
2047 that it composes. Not elegant. */
2049 repeating_data_set_write_case (struct transformation *trns_,
2050 write_case_func *write_case,
2051 write_case_data wc_data)
2053 struct repeating_data_trns *t = trns_->private;
2055 assert (trns_->proc == repeating_data_trns_proc);
2056 t->write_case = write_case;
2057 t->wc_data = wc_data;