1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include "data-list.h"
31 #include "debug-print.h"
33 #include "dictionary.h"
35 #include "file-handle.h"
46 #define _(msgid) gettext (msgid)
48 /* Utility function. */
50 /* FIXME: Either REPEATING DATA must be the last transformation, or we
51 must multiplex the transformations that follow (i.e., perform them
52 for every case that we produce from a repetition instance).
53 Currently we do neither. We should do one or the other. */
55 /* Describes how to parse one variable. */
58 struct dls_var_spec *next; /* Next specification in list. */
60 /* Both free and fixed formats. */
61 struct fmt_spec input; /* Input format of this field. */
62 struct variable *v; /* Associated variable. Used only in
63 parsing. Not safe later. */
64 int fv; /* First value in case. */
66 /* Fixed format only. */
67 int rec; /* Record number (1-based). */
68 int fc, lc; /* Column numbers in record. */
70 /* Free format only. */
71 char name[LONG_NAME_LEN + 1]; /* Name of variable. */
74 /* Constants for DATA LIST type. */
75 /* Must match table in cmd_data_list(). */
83 /* DATA LIST private data structure. */
86 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
87 struct dfm_reader *reader; /* Data file reader. */
89 int type; /* A DLS_* constant. */
90 struct variable *end; /* Variable specified on END subcommand. */
91 int eof; /* End of file encountered. */
92 int rec_cnt; /* Number of records. */
93 size_t case_size; /* Case size in bytes. */
94 char *delims; /* Delimiters if any; not null-terminated. */
95 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
98 static const struct case_source_class data_list_source_class;
100 static int parse_fixed (struct data_list_pgm *);
101 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
102 static void dump_fixed_table (const struct dls_var_spec *,
103 const struct file_handle *, int rec_cnt);
104 static void dump_free_table (const struct data_list_pgm *,
105 const struct file_handle *);
106 static void destroy_dls_var_spec (struct dls_var_spec *);
107 static trns_free_func data_list_trns_free;
108 static trns_proc_func data_list_trns_proc;
110 /* Message title for REPEATING DATA. */
111 #define RPD_ERR "REPEATING DATA: "
116 struct data_list_pgm *dls;
117 int table = -1; /* Print table if nonzero, -1=undecided. */
118 struct file_handle *fh = fh_inline_file ();
120 if (!case_source_is_complex (vfm_source))
121 discard_variables ();
123 dls = xmalloc (sizeof *dls);
131 dls->first = dls->last = NULL;
135 if (lex_match_id ("FILE"))
138 fh = fh_parse (FH_REF_FILE | FH_REF_INLINE);
141 if (case_source_is_class (vfm_source, &file_type_source_class)
142 && fh != fh_get_default_handle ())
144 msg (SE, _("DATA LIST must use the same file "
145 "as the enclosing FILE TYPE."));
149 else if (lex_match_id ("RECORDS"))
153 if (!lex_force_int ())
155 dls->rec_cnt = lex_integer ();
159 else if (lex_match_id ("END"))
163 msg (SE, _("The END subcommand may only be specified once."));
168 if (!lex_force_id ())
170 dls->end = dict_lookup_var (default_dict, tokid);
172 dls->end = dict_create_var_assert (default_dict, tokid, 0);
175 else if (token == T_ID)
177 if (lex_match_id ("NOTABLE"))
179 else if (lex_match_id ("TABLE"))
184 if (lex_match_id ("FIXED"))
186 else if (lex_match_id ("FREE"))
188 else if (lex_match_id ("LIST"))
198 msg (SE, _("Only one of FIXED, FREE, or LIST may "
204 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
207 while (!lex_match (')'))
211 if (lex_match_id ("TAB"))
213 else if (token == T_STRING && tokstr.length == 1)
215 delim = tokstr.string[0];
224 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
225 dls->delims[dls->delim_cnt++] = delim;
239 dls->case_size = dict_get_case_size (default_dict);
240 fh_set_default_handle (fh);
243 dls->type = DLS_FIXED;
247 if (dls->type == DLS_FREE)
253 if (dls->type == DLS_FIXED)
255 if (!parse_fixed (dls))
258 dump_fixed_table (dls->first, fh, dls->rec_cnt);
262 if (!parse_free (&dls->first, &dls->last))
265 dump_free_table (dls, fh);
268 dls->reader = dfm_open_reader (fh);
269 if (dls->reader == NULL)
272 if (vfm_source != NULL)
273 add_transformation (data_list_trns_proc, data_list_trns_free, dls);
275 vfm_source = create_case_source (&data_list_source_class, dls);
280 data_list_trns_free (dls);
284 /* Adds SPEC to the linked list with head at FIRST and tail at
287 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
288 struct dls_var_spec *spec)
295 (*last)->next = spec;
299 /* Fixed-format parsing. */
301 /* Used for chaining together fortran-like format specifiers. */
304 struct fmt_list *next;
307 struct fmt_list *down;
310 /* State of parsing DATA LIST. */
311 struct fixed_parsing_state
313 char **name; /* Variable names. */
314 size_t name_cnt; /* Number of names. */
316 int recno; /* Index of current record. */
317 int sc; /* 1-based column number of starting column for
318 next field to output. */
321 static int fixed_parse_compatible (struct fixed_parsing_state *,
322 struct dls_var_spec **,
323 struct dls_var_spec **);
324 static int fixed_parse_fortran (struct fixed_parsing_state *,
325 struct dls_var_spec **,
326 struct dls_var_spec **);
328 /* Parses all the variable specifications for DATA LIST FIXED,
329 storing them into DLS. Returns nonzero if successful. */
331 parse_fixed (struct data_list_pgm *dls)
333 struct fixed_parsing_state fx;
341 while (lex_match ('/'))
344 if (lex_is_integer ())
346 if (lex_integer () < fx.recno)
348 msg (SE, _("The record number specified, %ld, is "
349 "before the previous record, %d. Data "
350 "fields must be listed in order of "
351 "increasing record number."),
352 lex_integer (), fx.recno - 1);
356 fx.recno = lex_integer ();
362 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
365 if (lex_is_number ())
367 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
370 else if (token == '(')
372 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
377 msg (SE, _("SPSS-like or FORTRAN-like format "
378 "specification expected after variable names."));
382 for (i = 0; i < fx.name_cnt; i++)
386 if (dls->first == NULL)
388 msg (SE, _("At least one variable must be specified."));
391 if (dls->rec_cnt && dls->last->rec > dls->rec_cnt)
393 msg (SE, _("Variables are specified on records that "
394 "should not exist according to RECORDS subcommand."));
397 else if (!dls->rec_cnt)
398 dls->rec_cnt = dls->last->rec;
399 return lex_end_of_command () == CMD_SUCCESS;
402 for (i = 0; i < fx.name_cnt; i++)
408 /* Parses a variable specification in the form 1-10 (A) based on
409 FX and adds specifications to the linked list with head at
410 FIRST and tail at LAST. */
412 fixed_parse_compatible (struct fixed_parsing_state *fx,
413 struct dls_var_spec **first, struct dls_var_spec **last)
415 struct fmt_spec input;
421 if (!lex_force_int ())
426 msg (SE, _("Column positions for fields must be positive."));
432 lex_negative_to_dash ();
435 if (!lex_force_int ())
440 msg (SE, _("Column positions for fields must be positive."));
445 msg (SE, _("The ending column for a field must be "
446 "greater than the starting column."));
455 /* Divide columns evenly. */
456 input.w = (lc - fc + 1) / fx->name_cnt;
457 if ((lc - fc + 1) % fx->name_cnt)
459 msg (SE, _("The %d columns %d-%d "
460 "can't be evenly divided into %d fields."),
461 lc - fc + 1, fc, lc, fx->name_cnt);
465 /* Format specifier. */
468 struct fmt_desc *fdp;
474 input.type = parse_format_specifier_name (&cp, 0);
475 if (input.type == -1)
479 msg (SE, _("A format specifier on this line "
480 "has extra characters on the end."));
490 if (lex_is_integer ())
492 if (lex_integer () < 1)
494 msg (SE, _("The value for number of decimal places "
495 "must be at least 1."));
499 input.d = lex_integer ();
505 fdp = &formats[input.type];
506 if (fdp->n_args < 2 && input.d)
508 msg (SE, _("Input format %s doesn't accept decimal places."),
516 if (!lex_force_match (')'))
524 if (!check_input_specifier (&input, 1))
527 /* Start column for next specification. */
530 /* Width of variables to create. */
531 if (input.type == FMT_A || input.type == FMT_AHEX)
536 /* Create variables and var specs. */
537 for (i = 0; i < fx->name_cnt; i++)
539 struct dls_var_spec *spec;
542 v = dict_create_var (default_dict, fx->name[i], width);
545 convert_fmt_ItoO (&input, &v->print);
547 if (!case_source_is_complex (vfm_source))
552 v = dict_lookup_var_assert (default_dict, fx->name[i]);
553 if (vfm_source == NULL)
555 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
558 if ((width != 0) != (v->width != 0))
560 msg (SE, _("There is already a variable %s of a "
565 if (width != 0 && width != v->width)
567 msg (SE, _("There is already a string variable %s of a "
568 "different width."), fx->name[i]);
573 spec = xmalloc (sizeof *spec);
577 spec->rec = fx->recno;
578 spec->fc = fc + input.w * i;
579 spec->lc = spec->fc + input.w - 1;
580 append_var_spec (first, last, spec);
585 /* Destroy format list F and, if RECURSE is nonzero, all its
588 destroy_fmt_list (struct fmt_list *f, int recurse)
590 struct fmt_list *next;
595 if (recurse && f->f.type == FMT_DESCEND)
596 destroy_fmt_list (f->down, 1);
601 /* Takes a hierarchically structured fmt_list F as constructed by
602 fixed_parse_fortran(), and flattens it, adding the variable
603 specifications to the linked list with head FIRST and tail
604 LAST. NAME_IDX is used to take values from the list of names
605 in FX; it should initially point to a value of 0. */
607 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
608 struct dls_var_spec **first, struct dls_var_spec **last,
613 for (; f; f = f->next)
614 if (f->f.type == FMT_X)
616 else if (f->f.type == FMT_T)
618 else if (f->f.type == FMT_NEWREC)
620 fx->recno += f->count;
624 for (i = 0; i < f->count; i++)
625 if (f->f.type == FMT_DESCEND)
627 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
632 struct dls_var_spec *spec;
636 if (formats[f->f.type].cat & FCAT_STRING)
640 if (*name_idx >= fx->name_cnt)
642 msg (SE, _("The number of format "
643 "specifications exceeds the given number of "
648 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
651 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
655 if (!case_source_is_complex (vfm_source))
658 spec = xmalloc (sizeof *spec);
662 spec->rec = fx->recno;
664 spec->lc = fx->sc + f->f.w - 1;
665 append_var_spec (first, last, spec);
667 convert_fmt_ItoO (&spec->input, &v->print);
675 /* Recursively parses a FORTRAN-like format specification into
676 the linked list with head FIRST and tail TAIL. LEVEL is the
677 level of recursion, starting from 0. Returns the parsed
678 specification if successful, or a null pointer on failure. */
679 static struct fmt_list *
680 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
681 struct dls_var_spec **first,
682 struct dls_var_spec **last)
684 struct fmt_list *head = NULL;
685 struct fmt_list *tail = NULL;
687 lex_force_match ('(');
691 struct fmt_list *new = xmalloc (sizeof *new);
694 /* Append new to list. */
702 if (lex_is_integer ())
704 new->count = lex_integer ();
710 /* Parse format specifier. */
713 new->f.type = FMT_DESCEND;
714 new->down = fixed_parse_fortran_internal (fx, first, last);
715 if (new->down == NULL)
718 else if (lex_match ('/'))
719 new->f.type = FMT_NEWREC;
720 else if (!parse_format_specifier (&new->f, FMTP_ALLOW_XT)
721 || !check_input_specifier (&new->f, 1))
726 lex_force_match (')');
731 destroy_fmt_list (head, 0);
736 /* Parses a FORTRAN-like format specification into the linked
737 list with head FIRST and tail LAST. Returns nonzero if
740 fixed_parse_fortran (struct fixed_parsing_state *fx,
741 struct dls_var_spec **first, struct dls_var_spec **last)
743 struct fmt_list *list;
746 list = fixed_parse_fortran_internal (fx, first, last);
751 dump_fmt_list (fx, list, first, last, &name_idx);
752 destroy_fmt_list (list, 1);
753 if (name_idx < fx->name_cnt)
755 msg (SE, _("There aren't enough format specifications "
756 "to match the number of variable names given."));
763 /* Displays a table giving information on fixed-format variable
764 parsing on DATA LIST. */
765 /* FIXME: The `Columns' column should be divided into three columns,
766 one for the starting column, one for the dash, one for the ending
767 column; then right-justify the starting column and left-justify the
770 dump_fixed_table (const struct dls_var_spec *specs,
771 const struct file_handle *fh, int rec_cnt)
773 const struct dls_var_spec *spec;
777 for (i = 0, spec = specs; spec; spec = spec->next)
779 t = tab_create (4, i + 1, 0);
780 tab_columns (t, TAB_COL_DOWN, 1);
781 tab_headers (t, 0, 0, 1, 0);
782 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
783 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
784 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
785 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
786 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
787 tab_hline (t, TAL_2, 0, 3, 1);
788 tab_dim (t, tab_natural_dimensions);
790 for (i = 1, spec = specs; spec; spec = spec->next, i++)
792 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
793 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
794 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
796 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
797 fmt_to_string (&spec->input));
800 tab_title (t, 1, ngettext ("Reading %d record from %s.",
801 "Reading %d records from %s.", rec_cnt),
802 rec_cnt, fh_get_name (fh));
806 /* Free-format parsing. */
808 /* Parses variable specifications for DATA LIST FREE and adds
809 them to the linked list with head FIRST and tail LAST.
810 Returns nonzero only if successful. */
812 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
817 struct fmt_spec input, output;
823 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
828 if (!parse_format_specifier (&input, 0)
829 || !check_input_specifier (&input, 1)
830 || !lex_force_match (')'))
832 for (i = 0; i < name_cnt; i++)
837 convert_fmt_ItoO (&input, &output);
842 input = make_input_format (FMT_F, 8, 0);
843 output = *get_format ();
846 if (input.type == FMT_A || input.type == FMT_AHEX)
850 for (i = 0; i < name_cnt; i++)
852 struct dls_var_spec *spec;
855 v = dict_create_var (default_dict, name[i], width);
859 msg (SE, _("%s is a duplicate variable name."), name[i]);
862 v->print = v->write = output;
864 if (!case_source_is_complex (vfm_source))
867 spec = xmalloc (sizeof *spec);
871 str_copy_trunc (spec->name, sizeof spec->name, v->name);
872 append_var_spec (first, last, spec);
874 for (i = 0; i < name_cnt; i++)
879 return lex_end_of_command () == CMD_SUCCESS;
882 /* Displays a table giving information on free-format variable parsing
885 dump_free_table (const struct data_list_pgm *dls,
886 const struct file_handle *fh)
892 struct dls_var_spec *spec;
893 for (i = 0, spec = dls->first; spec; spec = spec->next)
897 t = tab_create (2, i + 1, 0);
898 tab_columns (t, TAB_COL_DOWN, 1);
899 tab_headers (t, 0, 0, 1, 0);
900 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
901 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
902 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
903 tab_hline (t, TAL_2, 0, 1, 1);
904 tab_dim (t, tab_natural_dimensions);
907 struct dls_var_spec *spec;
909 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
911 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
912 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
916 tab_title (t, 1, _("Reading free-form data from %s."), fh_get_name (fh));
921 /* Input procedure. */
923 /* Extracts a field from the current position in the current
924 record. Fields can be unquoted or quoted with single- or
925 double-quote characters. *FIELD is set to the field content.
926 After parsing the field, sets the current position in the
927 record to just past the field and any trailing delimiter.
928 END_BLANK is used internally; it should be initialized by the
929 caller to 0 and left alone afterward. Returns 0 on failure or
930 a 1-based column number indicating the beginning of the field
933 cut_field (const struct data_list_pgm *dls, struct fixed_string *field,
936 struct fixed_string line;
940 if (dfm_eof (dls->reader))
942 if (dls->delim_cnt == 0)
943 dfm_expand_tabs (dls->reader);
944 dfm_get_record (dls->reader, &line);
946 cp = ls_c_str (&line);
947 if (dls->delim_cnt == 0)
949 /* Skip leading whitespace. */
950 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
952 if (cp >= ls_end (&line))
955 /* Handle actual data, whether quoted or unquoted. */
956 if (*cp == '\'' || *cp == '"')
960 field->string = ++cp;
961 while (cp < ls_end (&line) && *cp != quote)
963 field->length = cp - field->string;
964 if (cp < ls_end (&line))
967 msg (SW, _("Quoted string missing terminating `%c'."), quote);
972 while (cp < ls_end (&line)
973 && !isspace ((unsigned char) *cp) && *cp != ',')
975 field->length = cp - field->string;
978 /* Skip trailing whitespace and a single comma if present. */
979 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
981 if (cp < ls_end (&line) && *cp == ',')
986 if (cp >= ls_end (&line))
988 int column = dfm_column_start (dls->reader);
989 /* A blank line or a line that ends in \t has a
990 trailing blank field. */
991 if (column == 1 || (column > 1 && cp[-1] == '\t'))
996 field->string = ls_end (&line);
998 dfm_forward_record (dls->reader);
1013 while (cp < ls_end (&line)
1014 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1016 field->length = cp - field->string;
1017 if (cp < ls_end (&line))
1022 dfm_forward_columns (dls->reader, field->string - line.string);
1023 column_start = dfm_column_start (dls->reader);
1025 dfm_forward_columns (dls->reader, cp - field->string);
1027 return column_start;
1030 typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *);
1031 static data_list_read_func read_from_data_list_fixed;
1032 static data_list_read_func read_from_data_list_free;
1033 static data_list_read_func read_from_data_list_list;
1035 /* Returns the proper function to read the kind of DATA LIST
1036 data specified by DLS. */
1037 static data_list_read_func *
1038 get_data_list_read_func (const struct data_list_pgm *dls)
1043 return read_from_data_list_fixed;
1046 return read_from_data_list_free;
1049 return read_from_data_list_list;
1057 /* Reads a case from the data file into C, parsing it according
1058 to fixed-format syntax rules in DLS. Returns -1 on success,
1059 -2 at end of file. */
1061 read_from_data_list_fixed (const struct data_list_pgm *dls,
1064 struct dls_var_spec *var_spec = dls->first;
1067 if (dfm_eof (dls->reader))
1069 for (i = 1; i <= dls->rec_cnt; i++)
1071 struct fixed_string line;
1073 if (dfm_eof (dls->reader))
1075 /* Note that this can't occur on the first record. */
1076 msg (SW, _("Partial case of %d of %d records discarded."),
1077 i - 1, dls->rec_cnt);
1080 dfm_expand_tabs (dls->reader);
1081 dfm_get_record (dls->reader, &line);
1083 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1087 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1088 var_spec->fc, var_spec->lc);
1089 di.v = case_data_rw (c, var_spec->fv);
1090 di.flags = DI_IMPLIED_DECIMALS;
1091 di.f1 = var_spec->fc;
1092 di.format = var_spec->input;
1097 dfm_forward_record (dls->reader);
1103 /* Reads a case from the data file into C, parsing it according
1104 to free-format syntax rules in DLS. Returns -1 on success,
1105 -2 at end of file. */
1107 read_from_data_list_free (const struct data_list_pgm *dls,
1110 struct dls_var_spec *var_spec;
1113 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1115 struct fixed_string field;
1118 /* Cut out a field and read in a new record if necessary. */
1121 column = cut_field (dls, &field, &end_blank);
1125 if (!dfm_eof (dls->reader))
1126 dfm_forward_record (dls->reader);
1127 if (dfm_eof (dls->reader))
1129 if (var_spec != dls->first)
1130 msg (SW, _("Partial case discarded. The first variable "
1131 "missing was %s."), var_spec->name);
1139 di.s = ls_c_str (&field);
1140 di.e = ls_end (&field);
1141 di.v = case_data_rw (c, var_spec->fv);
1144 di.format = var_spec->input;
1151 /* Reads a case from the data file and parses it according to
1152 list-format syntax rules. Returns -1 on success, -2 at end of
1155 read_from_data_list_list (const struct data_list_pgm *dls,
1158 struct dls_var_spec *var_spec;
1161 if (dfm_eof (dls->reader))
1164 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1166 struct fixed_string field;
1169 /* Cut out a field and check for end-of-line. */
1170 column = cut_field (dls, &field, &end_blank);
1173 if (get_undefined ())
1174 msg (SW, _("Missing value(s) for all variables from %s onward. "
1175 "These will be filled with the system-missing value "
1176 "or blanks, as appropriate."),
1178 for (; var_spec; var_spec = var_spec->next)
1180 int width = get_format_var_width (&var_spec->input);
1182 case_data_rw (c, var_spec->fv)->f = SYSMIS;
1184 memset (case_data_rw (c, var_spec->fv)->s, ' ', width);
1192 di.s = ls_c_str (&field);
1193 di.e = ls_end (&field);
1194 di.v = case_data_rw (c, var_spec->fv);
1197 di.format = var_spec->input;
1202 dfm_forward_record (dls->reader);
1206 /* Destroys SPEC. */
1208 destroy_dls_var_spec (struct dls_var_spec *spec)
1210 struct dls_var_spec *next;
1212 while (spec != NULL)
1220 /* Destroys DATA LIST transformation DLS. */
1222 data_list_trns_free (void *dls_)
1224 struct data_list_pgm *dls = dls_;
1226 destroy_dls_var_spec (dls->first);
1227 dfm_close_reader (dls->reader);
1231 /* Handle DATA LIST transformation DLS, parsing data into C. */
1233 data_list_trns_proc (void *dls_, struct ccase *c, int case_num UNUSED)
1235 struct data_list_pgm *dls = dls_;
1236 data_list_read_func *read_func;
1239 dfm_push (dls->reader);
1241 read_func = get_data_list_read_func (dls);
1242 retval = read_func (dls, c);
1244 /* Handle end of file. */
1247 /* If we already encountered end of file then this is an
1251 msg (SE, _("Attempt to read past end of file."));
1253 dfm_pop (dls->reader);
1257 /* Otherwise simply note it. */
1263 /* If there was an END subcommand handle it. */
1264 if (dls->end != NULL)
1268 case_data_rw (c, dls->end->fv)->f = 1.0;
1272 case_data_rw (c, dls->end->fv)->f = 0.0;
1275 dfm_pop (dls->reader);
1280 /* Reads all the records from the data file and passes them to
1283 data_list_source_read (struct case_source *source,
1285 write_case_func *write_case, write_case_data wc_data)
1287 struct data_list_pgm *dls = source->aux;
1288 data_list_read_func *read_func = get_data_list_read_func (dls);
1290 dfm_push (dls->reader);
1291 while (read_func (dls, c) != -2)
1292 if (!write_case (wc_data))
1294 dfm_pop (dls->reader);
1297 /* Destroys the source's internal data. */
1299 data_list_source_destroy (struct case_source *source)
1301 data_list_trns_free (source->aux);
1304 static const struct case_source_class data_list_source_class =
1308 data_list_source_read,
1309 data_list_source_destroy,
1312 /* REPEATING DATA. */
1314 /* Represents a number or a variable. */
1315 struct rpd_num_or_var
1317 int num; /* Value, or 0. */
1318 struct variable *var; /* Variable, if number==0. */
1321 /* REPEATING DATA private data structure. */
1322 struct repeating_data_trns
1324 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1325 struct dfm_reader *reader; /* Input file, never NULL. */
1327 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1328 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1329 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1330 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1331 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1332 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1334 /* ID subcommand. */
1335 int id_beg, id_end; /* Beginning & end columns. */
1336 struct variable *id_var; /* DATA LIST variable. */
1337 struct fmt_spec id_spec; /* Input format spec. */
1338 union value *id_value; /* ID value. */
1340 write_case_func *write_case;
1341 write_case_data wc_data;
1344 static trns_free_func repeating_data_trns_free;
1345 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1346 static int parse_repeating_data (struct dls_var_spec **,
1347 struct dls_var_spec **);
1348 static void find_variable_input_spec (struct variable *v,
1349 struct fmt_spec *spec);
1351 /* Parses the REPEATING DATA command. */
1353 cmd_repeating_data (void)
1355 struct repeating_data_trns *rpd;
1356 int table = 1; /* Print table? */
1357 bool saw_starts = false; /* Saw STARTS subcommand? */
1358 bool saw_occurs = false; /* Saw OCCURS subcommand? */
1359 bool saw_length = false; /* Saw LENGTH subcommand? */
1360 bool saw_continued = false; /* Saw CONTINUED subcommand? */
1361 bool saw_id = false; /* Saw ID subcommand? */
1362 struct file_handle *const fh = fh_get_default_handle ();
1364 assert (case_source_is_complex (vfm_source));
1366 rpd = xmalloc (sizeof *rpd);
1367 rpd->reader = dfm_open_reader (fh);
1368 rpd->first = rpd->last = NULL;
1369 rpd->starts_beg.num = 0;
1370 rpd->starts_beg.var = NULL;
1371 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1372 = rpd->cont_end = rpd->starts_beg;
1373 rpd->id_beg = rpd->id_end = 0;
1375 rpd->id_value = NULL;
1381 if (lex_match_id ("FILE"))
1383 struct file_handle *file;
1385 file = fh_parse (FH_REF_FILE | FH_REF_INLINE);
1390 msg (SE, _("REPEATING DATA must use the same file as its "
1391 "corresponding DATA LIST or FILE TYPE."));
1395 else if (lex_match_id ("STARTS"))
1400 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1405 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1408 lex_negative_to_dash ();
1409 if (lex_match ('-'))
1411 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1414 /* Otherwise, rpd->starts_end is uninitialized. We
1415 will initialize it later from the record length
1416 of the file. We can't do so now because the
1417 file handle may not be specified yet. */
1420 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1421 && rpd->starts_beg.num > rpd->starts_end.num)
1423 msg (SE, _("STARTS beginning column (%d) exceeds "
1424 "STARTS ending column (%d)."),
1425 rpd->starts_beg.num, rpd->starts_end.num);
1429 else if (lex_match_id ("OCCURS"))
1434 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1439 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1442 else if (lex_match_id ("LENGTH"))
1447 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1452 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1455 else if (lex_match_id ("CONTINUED"))
1460 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1463 saw_continued = true;
1465 if (!lex_match ('/'))
1467 if (!parse_num_or_var (&rpd->cont_beg,
1468 "CONTINUED beginning column"))
1471 lex_negative_to_dash ();
1473 && !parse_num_or_var (&rpd->cont_end,
1474 "CONTINUED ending column"))
1477 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1478 && rpd->cont_beg.num > rpd->cont_end.num)
1480 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1481 "CONTINUED ending column (%d)."),
1482 rpd->cont_beg.num, rpd->cont_end.num);
1487 rpd->cont_beg.num = 1;
1489 else if (lex_match_id ("ID"))
1494 msg (SE, _("%s subcommand given multiple times."),"ID");
1499 if (!lex_force_int ())
1501 if (lex_integer () < 1)
1503 msg (SE, _("ID beginning column (%ld) must be positive."),
1507 rpd->id_beg = lex_integer ();
1510 lex_negative_to_dash ();
1512 if (lex_match ('-'))
1514 if (!lex_force_int ())
1516 if (lex_integer () < 1)
1518 msg (SE, _("ID ending column (%ld) must be positive."),
1522 if (lex_integer () < rpd->id_end)
1524 msg (SE, _("ID ending column (%ld) cannot be less than "
1525 "ID beginning column (%d)."),
1526 lex_integer (), rpd->id_beg);
1530 rpd->id_end = lex_integer ();
1533 else rpd->id_end = rpd->id_beg;
1535 if (!lex_force_match ('='))
1537 rpd->id_var = parse_variable ();
1538 if (rpd->id_var == NULL)
1541 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1542 rpd->id_value = xnmalloc (rpd->id_var->nv, sizeof *rpd->id_value);
1544 else if (lex_match_id ("TABLE"))
1546 else if (lex_match_id ("NOTABLE"))
1548 else if (lex_match_id ("DATA"))
1556 if (!lex_force_match ('/'))
1560 /* Comes here when DATA specification encountered. */
1561 if (!saw_starts || !saw_occurs)
1564 msg (SE, _("Missing required specification STARTS."));
1566 msg (SE, _("Missing required specification OCCURS."));
1570 /* Enforce ID restriction. */
1571 if (saw_id && !saw_continued)
1573 msg (SE, _("ID specified without CONTINUED."));
1577 /* Calculate and check starts_end, cont_end if necessary. */
1578 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1580 rpd->starts_end.num = fh_get_record_width (fh);
1581 if (rpd->starts_beg.num != 0
1582 && rpd->starts_beg.num > rpd->starts_end.num)
1584 msg (SE, _("STARTS beginning column (%d) exceeds "
1585 "default STARTS ending column taken from file's "
1586 "record width (%d)."),
1587 rpd->starts_beg.num, rpd->starts_end.num);
1591 if (rpd->cont_end.num == 0 && rpd->cont_end.var == NULL)
1593 rpd->cont_end.num = fh_get_record_width (fh);
1594 if (rpd->cont_beg.num != 0
1595 && rpd->cont_beg.num > rpd->cont_end.num)
1597 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1598 "default CONTINUED ending column taken from file's "
1599 "record width (%d)."),
1600 rpd->cont_beg.num, rpd->cont_end.num);
1606 if (!parse_repeating_data (&rpd->first, &rpd->last))
1609 /* Calculate length if necessary. */
1612 struct dls_var_spec *iter;
1614 for (iter = rpd->first; iter; iter = iter->next)
1615 if (iter->lc > rpd->length.num)
1616 rpd->length.num = iter->lc;
1617 assert (rpd->length.num != 0);
1621 dump_fixed_table (rpd->first, fh, rpd->last->rec);
1623 add_transformation (repeating_data_trns_proc, repeating_data_trns_free, rpd);
1625 return lex_end_of_command ();
1628 repeating_data_trns_free (rpd);
1632 /* Finds the input format specification for variable V and puts
1633 it in SPEC. Because of the way that DATA LIST is structured,
1634 this is nontrivial. */
1636 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1640 for (i = 0; i < n_trns; i++)
1642 struct transformation *trns = &t_trns[i];
1644 if (trns->proc == data_list_trns_proc)
1646 struct data_list_pgm *pgm = trns->private;
1647 struct dls_var_spec *iter;
1649 for (iter = pgm->first; iter; iter = iter->next)
1652 *spec = iter->input;
1661 /* Parses a number or a variable name from the syntax file and puts
1662 the results in VALUE. Ensures that the number is at least 1; else
1663 emits an error based on MESSAGE. Returns nonzero only if
1666 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1671 value->var = parse_variable ();
1672 if (value->var == NULL)
1674 if (value->var->type == ALPHA)
1676 msg (SE, _("String variable not allowed here."));
1680 else if (lex_is_integer ())
1682 value->num = lex_integer ();
1686 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1692 msg (SE, _("Variable or integer expected for %s."), message);
1698 /* Parses data specifications for repeating data groups, adding
1699 them to the linked list with head FIRST and tail LAST.
1700 Returns nonzero only if successful. */
1702 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1704 struct fixed_parsing_state fx;
1710 while (token != '.')
1712 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1715 if (lex_is_number ())
1717 if (!fixed_parse_compatible (&fx, first, last))
1720 else if (token == '(')
1722 if (!fixed_parse_fortran (&fx, first, last))
1727 msg (SE, _("SPSS-like or FORTRAN-like format "
1728 "specification expected after variable names."));
1732 for (i = 0; i < fx.name_cnt; i++)
1740 for (i = 0; i < fx.name_cnt; i++)
1746 /* Obtains the real value for rpd_num_or_var N in case C and returns
1747 it. The valid range is nonnegative numbers, but numbers outside
1748 this range can be returned and should be handled by the caller as
1751 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1755 double v = case_num (c, n->var->fv);
1756 return v != SYSMIS && v >= INT_MIN && v <= INT_MAX ? v : -1;
1762 /* Parameter record passed to rpd_parse_record(). */
1763 struct rpd_parse_info
1765 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1766 const char *line; /* Line being parsed. */
1767 size_t len; /* Line length. */
1768 int beg, end; /* First and last column of first occurrence. */
1769 int ofs; /* Column offset between repeated occurrences. */
1770 struct ccase *c; /* Case to fill in. */
1771 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1772 int max_occurs; /* Max number of occurrences to parse. */
1775 /* Parses one record of repeated data and outputs corresponding
1776 cases. Returns number of occurrences parsed up to the
1777 maximum specified in INFO. */
1779 rpd_parse_record (const struct rpd_parse_info *info)
1781 struct repeating_data_trns *t = info->trns;
1782 int cur = info->beg;
1785 /* Handle record ID values. */
1788 union value id_temp[MAX_ELEMS_PER_VALUE];
1790 /* Parse record ID into V. */
1794 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1795 di.v = info->verify_id ? id_temp : t->id_value;
1798 di.format = t->id_spec;
1805 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1807 char expected_str [MAX_FORMATTED_LEN + 1];
1808 char actual_str [MAX_FORMATTED_LEN + 1];
1810 data_out (expected_str, &t->id_var->print, t->id_value);
1811 expected_str[t->id_var->print.w] = '\0';
1813 data_out (actual_str, &t->id_var->print, id_temp);
1814 actual_str[t->id_var->print.w] = '\0';
1817 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1818 actual_str, expected_str);
1824 /* Iterate over the set of expected occurrences and record each of
1825 them as a separate case. FIXME: We need to execute any
1826 transformations that follow the current one. */
1830 for (occurrences = 0; occurrences < info->max_occurs; )
1832 if (cur + info->ofs > info->end + 1)
1837 struct dls_var_spec *var_spec = t->first;
1839 for (; var_spec; var_spec = var_spec->next)
1841 int fc = var_spec->fc - 1 + cur;
1842 int lc = var_spec->lc - 1 + cur;
1844 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1849 _("Variable %s starting in column %d extends "
1850 "beyond physical record length of %d."),
1851 var_spec->v->name, fc, info->len);
1857 data_in_finite_line (&di, info->line, info->len, fc, lc);
1858 di.v = case_data_rw (info->c, var_spec->fv);
1861 di.format = var_spec->input;
1871 if (!t->write_case (t->wc_data))
1879 /* Reads one set of repetitions of the elements in the REPEATING
1880 DATA structure. Returns -1 on success, -2 on end of file or
1883 repeating_data_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
1885 struct repeating_data_trns *t = trns_;
1887 struct fixed_string line; /* Current record. */
1889 int starts_beg; /* Starting column. */
1890 int starts_end; /* Ending column. */
1891 int occurs; /* Number of repetitions. */
1892 int length; /* Length of each occurrence. */
1893 int cont_beg; /* Starting column for continuation lines. */
1894 int cont_end; /* Ending column for continuation lines. */
1896 int occurs_left; /* Number of occurrences remaining. */
1898 int code; /* Return value from rpd_parse_record(). */
1900 int skip_first_record = 0;
1902 dfm_push (t->reader);
1904 /* Read the current record. */
1905 dfm_reread_record (t->reader, 1);
1906 dfm_expand_tabs (t->reader);
1907 if (dfm_eof (t->reader))
1909 dfm_get_record (t->reader, &line);
1910 dfm_forward_record (t->reader);
1912 /* Calculate occurs, length. */
1913 occurs_left = occurs = realize_value (&t->occurs, c);
1916 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1919 starts_beg = realize_value (&t->starts_beg, c);
1920 if (starts_beg <= 0)
1922 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1927 starts_end = realize_value (&t->starts_end, c);
1928 if (starts_end < starts_beg)
1930 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1931 "beginning column (%d)."),
1932 starts_end, starts_beg);
1933 skip_first_record = 1;
1935 length = realize_value (&t->length, c);
1938 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1940 occurs = occurs_left = 1;
1942 cont_beg = realize_value (&t->cont_beg, c);
1945 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1950 cont_end = realize_value (&t->cont_end, c);
1951 if (cont_end < cont_beg)
1953 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1954 "beginning column (%d)."),
1955 cont_end, cont_beg);
1959 /* Parse the first record. */
1960 if (!skip_first_record)
1962 struct rpd_parse_info info;
1964 info.line = ls_c_str (&line);
1965 info.len = ls_length (&line);
1966 info.beg = starts_beg;
1967 info.end = starts_end;
1971 info.max_occurs = occurs_left;
1972 code = rpd_parse_record (&info);
1975 occurs_left -= code;
1977 else if (cont_beg == 0)
1980 /* Make sure, if some occurrences are left, that we have
1981 continuation records. */
1982 if (occurs_left > 0 && cont_beg == 0)
1985 _("Number of repetitions specified on OCCURS (%d) "
1986 "exceed number of repetitions available in "
1987 "space on STARTS (%d), and CONTINUED not specified."),
1988 occurs, (starts_end - starts_beg + 1) / length);
1992 /* Go on to additional records. */
1993 while (occurs_left != 0)
1995 struct rpd_parse_info info;
1997 assert (occurs_left >= 0);
1999 /* Read in another record. */
2000 if (dfm_eof (t->reader))
2003 _("Unexpected end of file with %d repetitions "
2004 "remaining out of %d."),
2005 occurs_left, occurs);
2008 dfm_expand_tabs (t->reader);
2009 dfm_get_record (t->reader, &line);
2010 dfm_forward_record (t->reader);
2012 /* Parse this record. */
2014 info.line = ls_c_str (&line);
2015 info.len = ls_length (&line);
2016 info.beg = cont_beg;
2017 info.end = cont_end;
2021 info.max_occurs = occurs_left;
2022 code = rpd_parse_record (&info);;
2025 occurs_left -= code;
2028 dfm_pop (t->reader);
2030 /* FIXME: This is a kluge until we've implemented multiplexing of
2035 /* Frees a REPEATING DATA transformation. */
2037 repeating_data_trns_free (void *rpd_)
2039 struct repeating_data_trns *rpd = rpd_;
2041 destroy_dls_var_spec (rpd->first);
2042 dfm_close_reader (rpd->reader);
2043 free (rpd->id_value);
2047 /* Lets repeating_data_trns_proc() know how to write the cases
2048 that it composes. Not elegant. */
2050 repeating_data_set_write_case (struct transformation *trns_,
2051 write_case_func *write_case,
2052 write_case_data wc_data)
2054 struct repeating_data_trns *t = trns_->private;
2056 assert (trns_->proc == repeating_data_trns_proc);
2057 t->write_case = write_case;
2058 t->wc_data = wc_data;