1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "data-list.h"
31 #include "debug-print.h"
33 #include "dictionary.h"
35 #include "file-handle.h"
45 /* Utility function. */
47 /* FIXME: Either REPEATING DATA must be the last transformation, or we
48 must multiplex the transformations that follow (i.e., perform them
49 for every case that we produce from a repetition instance).
50 Currently we do neither. We should do one or the other. */
52 /* Describes how to parse one variable. */
55 struct dls_var_spec *next; /* Next specification in list. */
57 /* Both free and fixed formats. */
58 struct fmt_spec input; /* Input format of this field. */
59 struct variable *v; /* Associated variable. Used only in
60 parsing. Not safe later. */
61 int fv; /* First value in case. */
63 /* Fixed format only. */
64 int rec; /* Record number (1-based). */
65 int fc, lc; /* Column numbers in record. */
67 /* Free format only. */
68 char name[9]; /* Name of variable. */
71 /* Constants for DATA LIST type. */
72 /* Must match table in cmd_data_list(). */
80 /* DATA LIST private data structure. */
85 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
86 struct dfm_reader *reader; /* Data file reader. */
88 int type; /* A DLS_* constant. */
89 struct variable *end; /* Variable specified on END subcommand. */
90 int eof; /* End of file encountered. */
91 int rec_cnt; /* Number of records. */
92 size_t case_size; /* Case size in bytes. */
93 char *delims; /* Delimiters if any; not null-terminated. */
94 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
97 static int parse_fixed (struct data_list_pgm *);
98 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
99 static void dump_fixed_table (const struct dls_var_spec *,
100 const struct file_handle *, int rec_cnt);
101 static void dump_free_table (const struct data_list_pgm *,
102 const struct file_handle *);
103 static void destroy_dls_var_spec (struct dls_var_spec *);
104 static trns_free_func data_list_trns_free;
105 static trns_proc_func data_list_trns_proc;
107 /* Message title for REPEATING DATA. */
108 #define RPD_ERR "REPEATING DATA: "
113 struct data_list_pgm *dls; /* DATA LIST program under construction. */
114 int table = -1; /* Print table if nonzero, -1=undecided. */
115 struct file_handle *fh = NULL; /* File handle of source, NULL=inline file. */
117 if (!case_source_is_complex (vfm_source))
118 discard_variables ();
120 dls = xmalloc (sizeof *dls);
128 dls->first = dls->last = NULL;
132 if (lex_match_id ("FILE"))
138 if (case_source_is_class (vfm_source, &file_type_source_class)
139 && fh != default_handle)
141 msg (SE, _("DATA LIST may not use a different file from "
142 "that specified on its surrounding FILE TYPE."));
146 else if (lex_match_id ("RECORDS"))
150 if (!lex_force_int ())
152 dls->rec_cnt = lex_integer ();
156 else if (lex_match_id ("END"))
160 msg (SE, _("The END subcommand may only be specified once."));
165 if (!lex_force_id ())
167 dls->end = dict_lookup_var (default_dict, tokid);
169 dls->end = dict_create_var_assert (default_dict, tokid, 0);
172 else if (token == T_ID)
174 if (lex_match_id ("NOTABLE"))
176 else if (lex_match_id ("TABLE"))
181 if (lex_match_id ("FIXED"))
183 else if (lex_match_id ("FREE"))
185 else if (lex_match_id ("LIST"))
195 msg (SE, _("Only one of FIXED, FREE, or LIST may "
201 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
204 while (!lex_match (')'))
208 if (lex_match_id ("TAB"))
210 else if (token == T_STRING && tokstr.length == 1)
211 delim = tokstr.string[0];
218 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
219 dls->delims[dls->delim_cnt++] = delim;
233 dls->case_size = dict_get_case_size (default_dict);
237 dls->type = DLS_FIXED;
241 if (dls->type == DLS_FREE)
247 if (dls->type == DLS_FIXED)
249 if (!parse_fixed (dls))
252 dump_fixed_table (dls->first, fh, dls->rec_cnt);
256 if (!parse_free (&dls->first, &dls->last))
259 dump_free_table (dls, fh);
262 dls->reader = dfm_open_reader (fh);
263 if (dls->reader == NULL)
266 if (vfm_source != NULL)
268 struct data_list_pgm *new_pgm;
270 dls->h.proc = data_list_trns_proc;
271 dls->h.free = data_list_trns_free;
273 new_pgm = xmalloc (sizeof *new_pgm);
274 memcpy (new_pgm, &dls, sizeof *new_pgm);
275 add_transformation (&new_pgm->h);
278 vfm_source = create_case_source (&data_list_source_class,
284 destroy_dls_var_spec (dls->first);
290 /* Adds SPEC to the linked list with head at FIRST and tail at
293 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
294 struct dls_var_spec *spec)
301 (*last)->next = spec;
305 /* Fixed-format parsing. */
307 /* Used for chaining together fortran-like format specifiers. */
310 struct fmt_list *next;
313 struct fmt_list *down;
316 /* State of parsing DATA LIST. */
317 struct fixed_parsing_state
319 char **name; /* Variable names. */
320 int name_cnt; /* Number of names. */
322 int recno; /* Index of current record. */
323 int sc; /* 1-based column number of starting column for
324 next field to output. */
327 static int fixed_parse_compatible (struct fixed_parsing_state *,
328 struct dls_var_spec **,
329 struct dls_var_spec **);
330 static int fixed_parse_fortran (struct fixed_parsing_state *,
331 struct dls_var_spec **,
332 struct dls_var_spec **);
334 /* Parses all the variable specifications for DATA LIST FIXED,
335 storing them into DLS. Returns nonzero if successful. */
337 parse_fixed (struct data_list_pgm *dls)
339 struct fixed_parsing_state fx;
347 while (lex_match ('/'))
350 if (lex_integer_p ())
352 if (lex_integer () < fx.recno)
354 msg (SE, _("The record number specified, %ld, is "
355 "before the previous record, %d. Data "
356 "fields must be listed in order of "
357 "increasing record number."),
358 lex_integer (), fx.recno - 1);
362 fx.recno = lex_integer ();
368 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
373 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
376 else if (token == '(')
378 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
383 msg (SE, _("SPSS-like or FORTRAN-like format "
384 "specification expected after variable names."));
388 for (i = 0; i < fx.name_cnt; i++)
392 if (dls->first == NULL)
394 msg (SE, _("At least one variable must be specified."));
397 if (dls->rec_cnt && dls->last->rec > dls->rec_cnt)
399 msg (SE, _("Variables are specified on records that "
400 "should not exist according to RECORDS subcommand."));
403 else if (!dls->rec_cnt)
404 dls->rec_cnt = dls->last->rec;
407 lex_error (_("expecting end of command"));
413 for (i = 0; i < fx.name_cnt; i++)
419 /* Parses a variable specification in the form 1-10 (A) based on
420 FX and adds specifications to the linked list with head at
421 FIRST and tail at LAST. */
423 fixed_parse_compatible (struct fixed_parsing_state *fx,
424 struct dls_var_spec **first, struct dls_var_spec **last)
426 struct fmt_spec input;
432 if (!lex_force_int ())
437 msg (SE, _("Column positions for fields must be positive."));
443 lex_negative_to_dash ();
446 if (!lex_force_int ())
451 msg (SE, _("Column positions for fields must be positive."));
456 msg (SE, _("The ending column for a field must be "
457 "greater than the starting column."));
466 /* Divide columns evenly. */
467 input.w = (lc - fc + 1) / fx->name_cnt;
468 if ((lc - fc + 1) % fx->name_cnt)
470 msg (SE, _("The %d columns %d-%d "
471 "can't be evenly divided into %d fields."),
472 lc - fc + 1, fc, lc, fx->name_cnt);
476 /* Format specifier. */
479 struct fmt_desc *fdp;
485 input.type = parse_format_specifier_name (&cp, 0);
486 if (input.type == -1)
490 msg (SE, _("A format specifier on this line "
491 "has extra characters on the end."));
501 if (lex_integer_p ())
503 if (lex_integer () < 1)
505 msg (SE, _("The value for number of decimal places "
506 "must be at least 1."));
510 input.d = lex_integer ();
516 fdp = &formats[input.type];
517 if (fdp->n_args < 2 && input.d)
519 msg (SE, _("Input format %s doesn't accept decimal places."),
527 if (!lex_force_match (')'))
535 if (!check_input_specifier (&input))
538 /* Start column for next specification. */
541 /* Width of variables to create. */
542 if (input.type == FMT_A || input.type == FMT_AHEX)
547 /* Create variables and var specs. */
548 for (i = 0; i < fx->name_cnt; i++)
550 struct dls_var_spec *spec;
553 v = dict_create_var (default_dict, fx->name[i], width);
556 convert_fmt_ItoO (&input, &v->print);
558 if (!case_source_is_complex (vfm_source))
563 v = dict_lookup_var_assert (default_dict, fx->name[i]);
564 if (vfm_source == NULL)
566 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
569 if ((width != 0) != (v->width != 0))
571 msg (SE, _("There is already a variable %s of a "
576 if (width != 0 && width != v->width)
578 msg (SE, _("There is already a string variable %s of a "
579 "different width."), fx->name[i]);
584 spec = xmalloc (sizeof *spec);
588 spec->rec = fx->recno;
589 spec->fc = fc + input.w * i;
590 spec->lc = spec->fc + input.w - 1;
591 append_var_spec (first, last, spec);
596 /* Destroy format list F and, if RECURSE is nonzero, all its
599 destroy_fmt_list (struct fmt_list *f, int recurse)
601 struct fmt_list *next;
606 if (recurse && f->f.type == FMT_DESCEND)
607 destroy_fmt_list (f->down, 1);
612 /* Takes a hierarchically structured fmt_list F as constructed by
613 fixed_parse_fortran(), and flattens it, adding the variable
614 specifications to the linked list with head FIRST and tail
615 LAST. NAME_IDX is used to take values from the list of names
616 in FX; it should initially point to a value of 0. */
618 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
619 struct dls_var_spec **first, struct dls_var_spec **last,
624 for (; f; f = f->next)
625 if (f->f.type == FMT_X)
627 else if (f->f.type == FMT_T)
629 else if (f->f.type == FMT_NEWREC)
631 fx->recno += f->count;
635 for (i = 0; i < f->count; i++)
636 if (f->f.type == FMT_DESCEND)
638 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
643 struct dls_var_spec *spec;
647 if (formats[f->f.type].cat & FCAT_STRING)
651 if (*name_idx >= fx->name_cnt)
653 msg (SE, _("The number of format "
654 "specifications exceeds the given number of "
659 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
662 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
666 if (!case_source_is_complex (vfm_source))
669 spec = xmalloc (sizeof *spec);
673 spec->rec = fx->recno;
675 spec->lc = fx->sc + f->f.w - 1;
676 append_var_spec (first, last, spec);
678 convert_fmt_ItoO (&spec->input, &v->print);
686 /* Recursively parses a FORTRAN-like format specification into
687 the linked list with head FIRST and tail TAIL. LEVEL is the
688 level of recursion, starting from 0. Returns the parsed
689 specification if successful, or a null pointer on failure. */
690 static struct fmt_list *
691 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
692 struct dls_var_spec **first,
693 struct dls_var_spec **last)
695 struct fmt_list *head = NULL;
696 struct fmt_list *tail = NULL;
698 lex_force_match ('(');
702 struct fmt_list *new = xmalloc (sizeof *new);
705 /* Append new to list. */
713 if (lex_integer_p ())
715 new->count = lex_integer ();
721 /* Parse format specifier. */
724 new->f.type = FMT_DESCEND;
725 new->down = fixed_parse_fortran_internal (fx, first, last);
726 if (new->down == NULL)
729 else if (lex_match ('/'))
730 new->f.type = FMT_NEWREC;
731 else if (!parse_format_specifier (&new->f, 1)
732 || !check_input_specifier (&new->f))
737 lex_force_match (')');
742 destroy_fmt_list (head, 0);
747 /* Parses a FORTRAN-like format specification into the linked
748 list with head FIRST and tail LAST. Returns nonzero if
751 fixed_parse_fortran (struct fixed_parsing_state *fx,
752 struct dls_var_spec **first, struct dls_var_spec **last)
754 struct fmt_list *list;
757 list = fixed_parse_fortran_internal (fx, first, last);
762 dump_fmt_list (fx, list, first, last, &name_idx);
763 destroy_fmt_list (list, 1);
764 if (name_idx < fx->name_cnt)
766 msg (SE, _("There aren't enough format specifications "
767 "to match the number of variable names given."));
774 /* Displays a table giving information on fixed-format variable
775 parsing on DATA LIST. */
776 /* FIXME: The `Columns' column should be divided into three columns,
777 one for the starting column, one for the dash, one for the ending
778 column; then right-justify the starting column and left-justify the
781 dump_fixed_table (const struct dls_var_spec *specs,
782 const struct file_handle *fh, int rec_cnt)
784 const struct dls_var_spec *spec;
788 for (i = 0, spec = specs; spec; spec = spec->next)
790 t = tab_create (4, i + 1, 0);
791 tab_columns (t, TAB_COL_DOWN, 1);
792 tab_headers (t, 0, 0, 1, 0);
793 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
794 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
795 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
796 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
797 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
798 tab_hline (t, TAL_2, 0, 3, 1);
799 tab_dim (t, tab_natural_dimensions);
801 for (i = 1, spec = specs; spec; spec = spec->next, i++)
803 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
804 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
805 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
807 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
808 fmt_to_string (&spec->input));
812 tab_title (t, 1, ngettext ("Reading %d record from file %s.",
813 "Reading %d records from file %s.", rec_cnt),
814 rec_cnt, handle_get_filename (fh));
816 tab_title (t, 1, ngettext ("Reading %d record from the command file.",
817 "Reading %d records from the command file.",
823 /* Free-format parsing. */
825 /* Parses variable specifications for DATA LIST FREE and adds
826 them to the linked list with head FIRST and tail LAST.
827 Returns nonzero only if successful. */
829 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
834 struct fmt_spec input, output;
840 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
844 if (!parse_format_specifier (&input, 0)
845 || !check_input_specifier (&input)
846 || !lex_force_match (')'))
848 for (i = 0; i < name_cnt; i++)
853 convert_fmt_ItoO (&input, &output);
861 output = get_format();
864 if (input.type == FMT_A || input.type == FMT_AHEX)
868 for (i = 0; i < name_cnt; i++)
870 struct dls_var_spec *spec;
873 v = dict_create_var (default_dict, name[i], width);
876 msg (SE, _("%s is a duplicate variable name."), name[i]);
879 v->print = v->write = output;
881 if (!case_source_is_complex (vfm_source))
884 spec = xmalloc (sizeof *spec);
888 strcpy (spec->name, name[i]);
889 append_var_spec (first, last, spec);
891 for (i = 0; i < name_cnt; i++)
897 lex_error (_("expecting end of command"));
901 /* Displays a table giving information on free-format variable parsing
904 dump_free_table (const struct data_list_pgm *dls,
905 const struct file_handle *fh)
911 struct dls_var_spec *spec;
912 for (i = 0, spec = dls->first; spec; spec = spec->next)
916 t = tab_create (2, i + 1, 0);
917 tab_columns (t, TAB_COL_DOWN, 1);
918 tab_headers (t, 0, 0, 1, 0);
919 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
920 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
921 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
922 tab_hline (t, TAL_2, 0, 1, 1);
923 tab_dim (t, tab_natural_dimensions);
926 struct dls_var_spec *spec;
928 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
930 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
931 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
936 tab_title (t, 1, _("Reading free-form data from file %s."),
937 handle_get_filename (fh));
939 tab_title (t, 1, _("Reading free-form data from the command file."));
944 /* Input procedure. */
946 /* Extracts a field from the current position in the current
947 record. Fields can be unquoted or quoted with single- or
948 double-quote characters. *FIELD is set to the field content.
949 After parsing the field, sets the current position in the
950 record to just past the field and any trailing delimiter.
951 END_BLANK is used internally; it should be initialized by the
952 caller to 0 and left alone afterward. Returns 0 on failure or
953 a 1-based column number indicating the beginning of the field
956 cut_field (const struct data_list_pgm *dls, struct len_string *field,
959 struct len_string line;
963 if (dfm_eof (dls->reader))
965 if (dls->delim_cnt == 0)
966 dfm_expand_tabs (dls->reader);
967 dfm_get_record (dls->reader, &line);
969 cp = ls_c_str (&line);
970 if (dls->delim_cnt == 0)
972 /* Skip leading whitespace. */
973 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
975 if (cp >= ls_end (&line))
978 /* Handle actual data, whether quoted or unquoted. */
979 if (*cp == '\'' || *cp == '"')
983 field->string = ++cp;
984 while (cp < ls_end (&line) && *cp != quote)
986 field->length = cp - field->string;
987 if (cp < ls_end (&line))
990 msg (SW, _("Quoted string missing terminating `%c'."), quote);
995 while (cp < ls_end (&line)
996 && !isspace ((unsigned char) *cp) && *cp != ',')
998 field->length = cp - field->string;
1001 /* Skip trailing whitespace and a single comma if present. */
1002 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
1004 if (cp < ls_end (&line) && *cp == ',')
1009 if (cp >= ls_end (&line))
1011 int column = dfm_column_start (dls->reader);
1012 /* A blank line or a line that ends in \t has a
1013 trailing blank field. */
1014 if (column == 1 || (column > 1 && cp[-1] == '\t'))
1016 if (*end_blank == 0)
1019 field->string = ls_end (&line);
1021 dfm_forward_record (dls->reader);
1036 while (cp < ls_end (&line)
1037 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1039 field->length = cp - field->string;
1040 if (cp < ls_end (&line))
1045 dfm_forward_columns (dls->reader, field->string - line.string);
1046 column_start = dfm_column_start (dls->reader);
1048 dfm_forward_columns (dls->reader, cp - field->string);
1050 return column_start;
1053 typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *);
1054 static data_list_read_func read_from_data_list_fixed;
1055 static data_list_read_func read_from_data_list_free;
1056 static data_list_read_func read_from_data_list_list;
1058 /* Returns the proper function to read the kind of DATA LIST
1059 data specified by DLS. */
1060 static data_list_read_func *
1061 get_data_list_read_func (const struct data_list_pgm *dls)
1066 return read_from_data_list_fixed;
1069 return read_from_data_list_free;
1072 return read_from_data_list_list;
1080 /* Reads a case from the data file into C, parsing it according
1081 to fixed-format syntax rules in DLS. Returns -1 on success,
1082 -2 at end of file. */
1084 read_from_data_list_fixed (const struct data_list_pgm *dls,
1087 struct dls_var_spec *var_spec = dls->first;
1090 if (dfm_eof (dls->reader))
1092 for (i = 1; i <= dls->rec_cnt; i++)
1094 struct len_string line;
1096 if (dfm_eof (dls->reader))
1098 /* Note that this can't occur on the first record. */
1099 msg (SW, _("Partial case of %d of %d records discarded."),
1100 i - 1, dls->rec_cnt);
1103 dfm_expand_tabs (dls->reader);
1104 dfm_get_record (dls->reader, &line);
1106 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1110 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1111 var_spec->fc, var_spec->lc);
1112 di.v = case_data_rw (c, var_spec->fv);
1114 di.f1 = var_spec->fc;
1115 di.format = var_spec->input;
1120 dfm_forward_record (dls->reader);
1126 /* Reads a case from the data file into C, parsing it according
1127 to free-format syntax rules in DLS. Returns -1 on success,
1128 -2 at end of file. */
1130 read_from_data_list_free (const struct data_list_pgm *dls,
1133 struct dls_var_spec *var_spec;
1136 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1138 struct len_string field;
1141 /* Cut out a field and read in a new record if necessary. */
1144 column = cut_field (dls, &field, &end_blank);
1148 if (!dfm_eof (dls->reader))
1149 dfm_forward_record (dls->reader);
1150 if (dfm_eof (dls->reader))
1152 if (var_spec != dls->first)
1153 msg (SW, _("Partial case discarded. The first variable "
1154 "missing was %s."), var_spec->name);
1162 di.s = ls_c_str (&field);
1163 di.e = ls_end (&field);
1164 di.v = case_data_rw (c, var_spec->fv);
1167 di.format = var_spec->input;
1174 /* Reads a case from the data file and parses it according to
1175 list-format syntax rules. Returns -1 on success, -2 at end of
1178 read_from_data_list_list (const struct data_list_pgm *dls,
1181 struct dls_var_spec *var_spec;
1184 if (dfm_eof (dls->reader))
1187 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1189 struct len_string field;
1192 /* Cut out a field and check for end-of-line. */
1193 column = cut_field (dls, &field, &end_blank);
1196 if (get_undefined ())
1197 msg (SW, _("Missing value(s) for all variables from %s onward. "
1198 "These will be filled with the system-missing value "
1199 "or blanks, as appropriate."),
1201 for (; var_spec; var_spec = var_spec->next)
1203 int width = get_format_var_width (&var_spec->input);
1205 case_data_rw (c, var_spec->fv)->f = SYSMIS;
1207 memset (case_data_rw (c, var_spec->fv)->s, ' ', width);
1215 di.s = ls_c_str (&field);
1216 di.e = ls_end (&field);
1217 di.v = case_data_rw (c, var_spec->fv);
1220 di.format = var_spec->input;
1225 dfm_forward_record (dls->reader);
1229 /* Destroys SPEC. */
1231 destroy_dls_var_spec (struct dls_var_spec *spec)
1233 struct dls_var_spec *next;
1235 while (spec != NULL)
1243 /* Destroys DATA LIST transformation PGM. */
1245 data_list_trns_free (struct trns_header *pgm)
1247 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1249 destroy_dls_var_spec (dls->first);
1250 dfm_close_reader (dls->reader);
1254 /* Handle DATA LIST transformation T, parsing data into C. */
1256 data_list_trns_proc (struct trns_header *t, struct ccase *c,
1257 int case_num UNUSED)
1259 struct data_list_pgm *dls = (struct data_list_pgm *) t;
1260 data_list_read_func *read_func;
1263 dfm_push (dls->reader);
1265 read_func = get_data_list_read_func (dls);
1266 retval = read_func (dls, c);
1268 /* Handle end of file. */
1271 /* If we already encountered end of file then this is an
1275 msg (SE, _("Attempt to read past end of file."));
1277 dfm_pop (dls->reader);
1281 /* Otherwise simply note it. */
1287 /* If there was an END subcommand handle it. */
1288 if (dls->end != NULL)
1292 case_data_rw (c, dls->end->fv)->f = 1.0;
1296 case_data_rw (c, dls->end->fv)->f = 0.0;
1299 dfm_pop (dls->reader);
1304 /* Reads all the records from the data file and passes them to
1307 data_list_source_read (struct case_source *source,
1309 write_case_func *write_case, write_case_data wc_data)
1311 struct data_list_pgm *dls = source->aux;
1312 data_list_read_func *read_func = get_data_list_read_func (dls);
1314 dfm_push (dls->reader);
1315 while (read_func (dls, c) != -2)
1316 if (!write_case (wc_data))
1318 dfm_pop (dls->reader);
1321 /* Destroys the source's internal data. */
1323 data_list_source_destroy (struct case_source *source)
1325 data_list_trns_free (source->aux);
1328 const struct case_source_class data_list_source_class =
1332 data_list_source_read,
1333 data_list_source_destroy,
1336 /* REPEATING DATA. */
1338 /* Represents a number or a variable. */
1339 struct rpd_num_or_var
1341 int num; /* Value, or 0. */
1342 struct variable *var; /* Variable, if number==0. */
1345 /* REPEATING DATA private data structure. */
1346 struct repeating_data_trns
1348 struct trns_header h;
1349 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1350 struct dfm_reader *reader; /* Input file, never NULL. */
1352 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1353 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1354 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1355 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1356 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1357 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1359 /* ID subcommand. */
1360 int id_beg, id_end; /* Beginning & end columns. */
1361 struct variable *id_var; /* DATA LIST variable. */
1362 struct fmt_spec id_spec; /* Input format spec. */
1363 union value *id_value; /* ID value. */
1365 write_case_func *write_case;
1366 write_case_data wc_data;
1369 static trns_free_func repeating_data_trns_free;
1370 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1371 static int parse_repeating_data (struct dls_var_spec **,
1372 struct dls_var_spec **);
1373 static void find_variable_input_spec (struct variable *v,
1374 struct fmt_spec *spec);
1376 /* Parses the REPEATING DATA command. */
1378 cmd_repeating_data (void)
1380 struct repeating_data_trns *rpd;
1381 int table = 1; /* Print table? */
1382 unsigned seen = 0; /* Mark subcommands as already seen. */
1383 struct file_handle *const fh = default_handle;
1385 assert (case_source_is_complex (vfm_source));
1387 rpd = xmalloc (sizeof *rpd);
1388 rpd->reader = dfm_open_reader (default_handle);
1389 rpd->first = rpd->last = NULL;
1390 rpd->starts_beg.num = 0;
1391 rpd->starts_beg.var = NULL;
1392 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1393 = rpd->cont_end = rpd->starts_beg;
1394 rpd->id_beg = rpd->id_end = 0;
1396 rpd->id_value = NULL;
1402 if (lex_match_id ("FILE"))
1404 struct file_handle *file;
1411 msg (SE, _("REPEATING DATA must use the same file as its "
1412 "corresponding DATA LIST or FILE TYPE."));
1416 else if (lex_match_id ("STARTS"))
1421 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1426 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1429 lex_negative_to_dash ();
1430 if (lex_match ('-'))
1432 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1435 /* Otherwise, rpd->starts_end is left uninitialized.
1436 This is okay. We will initialize it later from the
1437 record length of the file. We can't do this now
1438 because we can't be sure that the user has specified
1439 the file handle yet. */
1442 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1443 && rpd->starts_beg.num > rpd->starts_end.num)
1445 msg (SE, _("STARTS beginning column (%d) exceeds "
1446 "STARTS ending column (%d)."),
1447 rpd->starts_beg.num, rpd->starts_end.num);
1451 else if (lex_match_id ("OCCURS"))
1456 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1461 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1464 else if (lex_match_id ("LENGTH"))
1469 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1474 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1477 else if (lex_match_id ("CONTINUED"))
1482 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1487 if (!lex_match ('/'))
1489 if (!parse_num_or_var (&rpd->cont_beg, "CONTINUED beginning column"))
1492 lex_negative_to_dash ();
1494 && !parse_num_or_var (&rpd->cont_end,
1495 "CONTINUED ending column"))
1498 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1499 && rpd->cont_beg.num > rpd->cont_end.num)
1501 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1502 "CONTINUED ending column (%d)."),
1503 rpd->cont_beg.num, rpd->cont_end.num);
1508 rpd->cont_beg.num = 1;
1510 else if (lex_match_id ("ID"))
1515 msg (SE, _("%s subcommand given multiple times."),"ID");
1520 if (!lex_force_int ())
1522 if (lex_integer () < 1)
1524 msg (SE, _("ID beginning column (%ld) must be positive."),
1528 rpd->id_beg = lex_integer ();
1531 lex_negative_to_dash ();
1533 if (lex_match ('-'))
1535 if (!lex_force_int ())
1537 if (lex_integer () < 1)
1539 msg (SE, _("ID ending column (%ld) must be positive."),
1543 if (lex_integer () < rpd->id_end)
1545 msg (SE, _("ID ending column (%ld) cannot be less than "
1546 "ID beginning column (%d)."),
1547 lex_integer (), rpd->id_beg);
1551 rpd->id_end = lex_integer ();
1554 else rpd->id_end = rpd->id_beg;
1556 if (!lex_force_match ('='))
1558 rpd->id_var = parse_variable ();
1559 if (rpd->id_var == NULL)
1562 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1563 rpd->id_value = xmalloc (sizeof *rpd->id_value * rpd->id_var->nv);
1565 else if (lex_match_id ("TABLE"))
1567 else if (lex_match_id ("NOTABLE"))
1569 else if (lex_match_id ("DATA"))
1577 if (!lex_force_match ('/'))
1581 /* Comes here when DATA specification encountered. */
1582 if ((seen & (1 | 2)) != (1 | 2))
1584 if ((seen & 1) == 0)
1585 msg (SE, _("Missing required specification STARTS."));
1586 if ((seen & 2) == 0)
1587 msg (SE, _("Missing required specification OCCURS."));
1591 /* Enforce ID restriction. */
1592 if ((seen & 16) && !(seen & 8))
1594 msg (SE, _("ID specified without CONTINUED."));
1598 /* Calculate starts_end, cont_end if necessary. */
1599 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1600 rpd->starts_end.num = handle_get_record_width (fh);
1601 if (rpd->cont_end.num == 0 && rpd->starts_end.var == NULL)
1602 rpd->cont_end.num = handle_get_record_width (fh);
1604 /* Calculate length if possible. */
1605 if ((seen & 4) == 0)
1607 struct dls_var_spec *iter;
1609 for (iter = rpd->first; iter; iter = iter->next)
1611 if (iter->lc > rpd->length.num)
1612 rpd->length.num = iter->lc;
1614 assert (rpd->length.num != 0);
1618 if (!parse_repeating_data (&rpd->first, &rpd->last))
1622 dump_fixed_table (rpd->first, fh, rpd->last->rec);
1625 struct repeating_data_trns *new_trns;
1627 rpd->h.proc = repeating_data_trns_proc;
1628 rpd->h.free = repeating_data_trns_free;
1630 new_trns = xmalloc (sizeof *new_trns);
1631 memcpy (new_trns, &rpd, sizeof *new_trns);
1632 add_transformation ((struct trns_header *) new_trns);
1635 return lex_end_of_command ();
1638 destroy_dls_var_spec (rpd->first);
1639 free (rpd->id_value);
1643 /* Finds the input format specification for variable V and puts
1644 it in SPEC. Because of the way that DATA LIST is structured,
1645 this is nontrivial. */
1647 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1651 for (i = 0; i < n_trns; i++)
1653 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1655 if (pgm->h.proc == data_list_trns_proc)
1657 struct dls_var_spec *iter;
1659 for (iter = pgm->first; iter; iter = iter->next)
1662 *spec = iter->input;
1671 /* Parses a number or a variable name from the syntax file and puts
1672 the results in VALUE. Ensures that the number is at least 1; else
1673 emits an error based on MESSAGE. Returns nonzero only if
1676 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1681 value->var = parse_variable ();
1682 if (value->var == NULL)
1684 if (value->var->type == ALPHA)
1686 msg (SE, _("String variable not allowed here."));
1690 else if (lex_integer_p ())
1692 value->num = lex_integer ();
1696 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1702 msg (SE, _("Variable or integer expected for %s."), message);
1708 /* Parses data specifications for repeating data groups, adding
1709 them to the linked list with head FIRST and tail LAST.
1710 Returns nonzero only if successful. */
1712 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1714 struct fixed_parsing_state fx;
1720 while (token != '.')
1722 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1727 if (!fixed_parse_compatible (&fx, first, last))
1730 else if (token == '(')
1732 if (!fixed_parse_fortran (&fx, first, last))
1737 msg (SE, _("SPSS-like or FORTRAN-like format "
1738 "specification expected after variable names."));
1742 for (i = 0; i < fx.name_cnt; i++)
1748 lex_error (_("expecting end of command"));
1755 for (i = 0; i < fx.name_cnt; i++)
1761 /* Obtains the real value for rpd_num_or_var N in case C and returns
1762 it. The valid range is nonnegative numbers, but numbers outside
1763 this range can be returned and should be handled by the caller as
1766 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1771 assert (n->num == 0);
1774 double v = case_num (c, n->var->fv);
1776 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1785 /* Parameter record passed to rpd_parse_record(). */
1786 struct rpd_parse_info
1788 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1789 const char *line; /* Line being parsed. */
1790 size_t len; /* Line length. */
1791 int beg, end; /* First and last column of first occurrence. */
1792 int ofs; /* Column offset between repeated occurrences. */
1793 struct ccase *c; /* Case to fill in. */
1794 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1795 int max_occurs; /* Max number of occurrences to parse. */
1798 /* Parses one record of repeated data and outputs corresponding
1799 cases. Returns number of occurrences parsed up to the
1800 maximum specified in INFO. */
1802 rpd_parse_record (const struct rpd_parse_info *info)
1804 struct repeating_data_trns *t = info->trns;
1805 int cur = info->beg;
1808 /* Handle record ID values. */
1811 union value id_temp[MAX_ELEMS_PER_VALUE];
1813 /* Parse record ID into V. */
1817 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1818 di.v = info->verify_id ? id_temp : t->id_value;
1821 di.format = t->id_spec;
1828 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1830 char expected_str [MAX_FORMATTED_LEN + 1];
1831 char actual_str [MAX_FORMATTED_LEN + 1];
1833 data_out (expected_str, &t->id_var->print, t->id_value);
1834 expected_str[t->id_var->print.w] = '\0';
1836 data_out (actual_str, &t->id_var->print, id_temp);
1837 actual_str[t->id_var->print.w] = '\0';
1840 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1841 actual_str, expected_str);
1847 /* Iterate over the set of expected occurrences and record each of
1848 them as a separate case. FIXME: We need to execute any
1849 transformations that follow the current one. */
1853 for (occurrences = 0; occurrences < info->max_occurs; )
1855 if (cur + info->ofs > info->end + 1)
1860 struct dls_var_spec *var_spec = t->first;
1862 for (; var_spec; var_spec = var_spec->next)
1864 int fc = var_spec->fc - 1 + cur;
1865 int lc = var_spec->lc - 1 + cur;
1867 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1872 _("Variable %s starting in column %d extends "
1873 "beyond physical record length of %d."),
1874 var_spec->v->name, fc, info->len);
1880 data_in_finite_line (&di, info->line, info->len, fc, lc);
1881 di.v = case_data_rw (info->c, var_spec->fv);
1884 di.format = var_spec->input;
1894 if (!t->write_case (t->wc_data))
1902 /* Reads one set of repetitions of the elements in the REPEATING
1903 DATA structure. Returns -1 on success, -2 on end of file or
1906 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c,
1907 int case_num UNUSED)
1909 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1911 struct len_string line; /* Current record. */
1913 int starts_beg; /* Starting column. */
1914 int starts_end; /* Ending column. */
1915 int occurs; /* Number of repetitions. */
1916 int length; /* Length of each occurrence. */
1917 int cont_beg; /* Starting column for continuation lines. */
1918 int cont_end; /* Ending column for continuation lines. */
1920 int occurs_left; /* Number of occurrences remaining. */
1922 int code; /* Return value from rpd_parse_record(). */
1924 int skip_first_record = 0;
1926 dfm_push (t->reader);
1928 /* Read the current record. */
1929 dfm_reread_record (t->reader, 1);
1930 dfm_expand_tabs (t->reader);
1931 if (dfm_eof (t->reader))
1933 dfm_get_record (t->reader, &line);
1934 dfm_forward_record (t->reader);
1936 /* Calculate occurs, length. */
1937 occurs_left = occurs = realize_value (&t->occurs, c);
1940 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1943 starts_beg = realize_value (&t->starts_beg, c);
1944 if (starts_beg <= 0)
1946 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1951 starts_end = realize_value (&t->starts_end, c);
1952 if (starts_end < starts_beg)
1954 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1955 "beginning column (%d)."),
1956 starts_end, starts_beg);
1957 skip_first_record = 1;
1959 length = realize_value (&t->length, c);
1962 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1964 occurs = occurs_left = 1;
1966 cont_beg = realize_value (&t->cont_beg, c);
1969 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1974 cont_end = realize_value (&t->cont_end, c);
1975 if (cont_end < cont_beg)
1977 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1978 "beginning column (%d)."),
1979 cont_end, cont_beg);
1983 /* Parse the first record. */
1984 if (!skip_first_record)
1986 struct rpd_parse_info info;
1988 info.line = ls_c_str (&line);
1989 info.len = ls_length (&line);
1990 info.beg = starts_beg;
1991 info.end = starts_end;
1995 info.max_occurs = occurs_left;
1996 code = rpd_parse_record (&info);
1999 occurs_left -= code;
2001 else if (cont_beg == 0)
2004 /* Make sure, if some occurrences are left, that we have
2005 continuation records. */
2006 if (occurs_left > 0 && cont_beg == 0)
2009 _("Number of repetitions specified on OCCURS (%d) "
2010 "exceed number of repetitions available in "
2011 "space on STARTS (%d), and CONTINUED not specified."),
2012 occurs, (starts_end - starts_beg + 1) / length);
2016 /* Go on to additional records. */
2017 while (occurs_left != 0)
2019 struct rpd_parse_info info;
2021 assert (occurs_left >= 0);
2023 /* Read in another record. */
2024 if (dfm_eof (t->reader))
2027 _("Unexpected end of file with %d repetitions "
2028 "remaining out of %d."),
2029 occurs_left, occurs);
2032 dfm_expand_tabs (t->reader);
2033 dfm_get_record (t->reader, &line);
2034 dfm_forward_record (t->reader);
2036 /* Parse this record. */
2038 info.line = ls_c_str (&line);
2039 info.len = ls_length (&line);
2040 info.beg = cont_beg;
2041 info.end = cont_end;
2045 info.max_occurs = occurs_left;
2046 code = rpd_parse_record (&info);;
2049 occurs_left -= code;
2052 dfm_pop (t->reader);
2054 /* FIXME: This is a kluge until we've implemented multiplexing of
2059 /* Frees a REPEATING DATA transformation. */
2061 repeating_data_trns_free (struct trns_header *rpd_)
2063 struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_;
2065 destroy_dls_var_spec (rpd->first);
2066 dfm_close_reader (rpd->reader);
2067 free (rpd->id_value);
2070 /* Lets repeating_data_trns_proc() know how to write the cases
2071 that it composes. Not elegant. */
2073 repeating_data_set_write_case (struct trns_header *trns,
2074 write_case_func *write_case,
2075 write_case_data wc_data)
2077 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
2079 assert (trns->proc == repeating_data_trns_proc);
2080 t->write_case = write_case;
2081 t->wc_data = wc_data;