1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "data-list.h"
30 #include "debug-print.h"
33 #include "file-handle.h"
43 /* Utility function. */
45 /* FIXME: Either REPEATING DATA must be the last transformation, or we
46 must multiplex the transformations that follow (i.e., perform them
47 for every case that we produce from a repetition instance).
48 Currently we do neither. We should do one or the other. */
50 /* Describes how to parse one variable. */
53 struct dls_var_spec *next; /* Next specification in list. */
55 /* Both free and fixed formats. */
56 struct fmt_spec input; /* Input format of this field. */
57 struct variable *v; /* Associated variable. Used only in
58 parsing. Not safe later. */
59 int fv; /* First value in case. */
61 /* Fixed format only. */
62 int rec; /* Record number (1-based). */
63 int fc, lc; /* Column numbers in record. */
65 /* Free format only. */
66 char name[9]; /* Name of variable. */
69 /* Constants for DATA LIST type. */
70 /* Must match table in cmd_data_list(). */
78 /* DATA LIST private data structure. */
83 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
84 struct file_handle *handle; /* Input file, never NULL. */
86 int type; /* A DLS_* constant. */
87 struct variable *end; /* Variable specified on END subcommand. */
88 int eof; /* End of file encountered. */
89 int nrec; /* Number of records. */
90 size_t case_size; /* Case size in bytes. */
91 int delim; /* Specified delimeter */
94 static int parse_fixed (struct data_list_pgm *);
95 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
96 static void dump_fixed_table (const struct dls_var_spec *specs,
97 const struct file_handle *handle, int nrec);
98 static void dump_free_table (const struct data_list_pgm *);
99 static void destroy_dls_var_spec (struct dls_var_spec *);
100 static trns_free_func data_list_trns_free;
101 static trns_proc_func data_list_trns_proc;
103 /* Message title for REPEATING DATA. */
104 #define RPD_ERR "REPEATING DATA: "
109 /* DATA LIST program under construction. */
110 struct data_list_pgm *dls;
112 /* 0=print no table, 1=print table. (TABLE subcommand.) */
115 if (!case_source_is_complex (vfm_source))
116 discard_variables ();
118 dls = xmalloc (sizeof *dls);
119 dls->handle = default_handle;
125 dls->first = dls->last = NULL;
129 if (lex_match_id ("FILE"))
132 dls->handle = fh_parse_file_handle ();
135 if (case_source_is_class (vfm_source, &file_type_source_class)
136 && dls->handle != default_handle)
138 msg (SE, _("DATA LIST may not use a different file from "
139 "that specified on its surrounding FILE TYPE."));
143 else if (lex_match_id ("RECORDS"))
147 if (!lex_force_int ())
149 dls->nrec = lex_integer ();
153 else if (lex_match_id ("END"))
157 msg (SE, _("The END subcommand may only be specified once."));
162 if (!lex_force_id ())
164 dls->end = dict_lookup_var (default_dict, tokid);
166 dls->end = dict_create_var_assert (default_dict, tokid, 0);
169 else if (token == T_ID)
171 /* Must match DLS_* constants. */
172 static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE",
177 for (p = id; *p; p++)
178 if (lex_id_match (*p, tokid))
193 msg (SE, _("Only one of FIXED, FREE, or LIST may "
203 else if (token=='(') {
205 if (lex_match_id ("TAB")) {
217 dls->case_size = dict_get_case_size (default_dict);
218 default_handle = dls->handle;
221 dls->type = DLS_FIXED;
225 if (dls->type == DLS_FREE)
231 if (dls->type == DLS_FIXED)
233 if (!parse_fixed (dls))
236 dump_fixed_table (dls->first, dls->handle, dls->nrec);
240 if (!parse_free (&dls->first, &dls->last))
243 dump_free_table (dls);
246 if (!dfm_open_for_reading (dls->handle))
249 if (vfm_source != NULL)
251 struct data_list_pgm *new_pgm;
253 dls->h.proc = data_list_trns_proc;
254 dls->h.free = data_list_trns_free;
256 new_pgm = xmalloc (sizeof *new_pgm);
257 memcpy (new_pgm, &dls, sizeof *new_pgm);
258 add_transformation (&new_pgm->h);
261 vfm_source = create_case_source (&data_list_source_class,
267 destroy_dls_var_spec (dls->first);
272 /* Adds SPEC to the linked list with head at FIRST and tail at
275 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
276 struct dls_var_spec *spec)
283 (*last)->next = spec;
287 /* Fixed-format parsing. */
289 /* Used for chaining together fortran-like format specifiers. */
292 struct fmt_list *next;
295 struct fmt_list *down;
298 /* State of parsing DATA LIST. */
299 struct fixed_parsing_state
301 char **name; /* Variable names. */
302 int name_cnt; /* Number of names. */
304 int recno; /* Index of current record. */
305 int sc; /* 1-based column number of starting column for
306 next field to output. */
309 static int fixed_parse_compatible (struct fixed_parsing_state *,
310 struct dls_var_spec **,
311 struct dls_var_spec **);
312 static int fixed_parse_fortran (struct fixed_parsing_state *,
313 struct dls_var_spec **,
314 struct dls_var_spec **);
316 /* Parses all the variable specifications for DATA LIST FIXED,
317 storing them into DLS. Returns nonzero if successful. */
319 parse_fixed (struct data_list_pgm *dls)
321 struct fixed_parsing_state fx;
329 while (lex_match ('/'))
332 if (lex_integer_p ())
334 if (lex_integer () < fx.recno)
336 msg (SE, _("The record number specified, %ld, is "
337 "before the previous record, %d. Data "
338 "fields must be listed in order of "
339 "increasing record number."),
340 lex_integer (), fx.recno - 1);
344 fx.recno = lex_integer ();
350 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
355 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
358 else if (token == '(')
360 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
365 msg (SE, _("SPSS-like or FORTRAN-like format "
366 "specification expected after variable names."));
370 for (i = 0; i < fx.name_cnt; i++)
374 if (dls->first == NULL)
376 msg (SE, _("At least one variable must be specified."));
379 if (dls->nrec && dls->last->rec > dls->nrec)
381 msg (SE, _("Variables are specified on records that "
382 "should not exist according to RECORDS subcommand."));
386 dls->nrec = dls->last->rec;
389 lex_error (_("expecting end of command"));
395 for (i = 0; i < fx.name_cnt; i++)
401 /* Parses a variable specification in the form 1-10 (A) based on
402 FX and adds specifications to the linked list with head at
403 FIRST and tail at LAST. */
405 fixed_parse_compatible (struct fixed_parsing_state *fx,
406 struct dls_var_spec **first, struct dls_var_spec **last)
408 struct fmt_spec input;
414 if (!lex_force_int ())
419 msg (SE, _("Column positions for fields must be positive."));
425 lex_negative_to_dash ();
428 if (!lex_force_int ())
433 msg (SE, _("Column positions for fields must be positive."));
438 msg (SE, _("The ending column for a field must be "
439 "greater than the starting column."));
448 /* Divide columns evenly. */
449 input.w = (lc - fc + 1) / fx->name_cnt;
450 if ((lc - fc + 1) % fx->name_cnt)
452 msg (SE, _("The %d columns %d-%d "
453 "can't be evenly divided into %d fields."),
454 lc - fc + 1, fc, lc, fx->name_cnt);
458 /* Format specifier. */
461 struct fmt_desc *fdp;
467 input.type = parse_format_specifier_name (&cp, 0);
468 if (input.type == -1)
472 msg (SE, _("A format specifier on this line "
473 "has extra characters on the end."));
483 if (lex_integer_p ())
485 if (lex_integer () < 1)
487 msg (SE, _("The value for number of decimal places "
488 "must be at least 1."));
492 input.d = lex_integer ();
498 fdp = &formats[input.type];
499 if (fdp->n_args < 2 && input.d)
501 msg (SE, _("Input format %s doesn't accept decimal places."),
509 if (!lex_force_match (')'))
517 if (!check_input_specifier (&input))
520 /* Start column for next specification. */
523 /* Width of variables to create. */
524 if (input.type == FMT_A || input.type == FMT_AHEX)
529 /* Create variables and var specs. */
530 for (i = 0; i < fx->name_cnt; i++)
532 struct dls_var_spec *spec;
535 v = dict_create_var (default_dict, fx->name[i], width);
538 convert_fmt_ItoO (&input, &v->print);
540 if (!case_source_is_complex (vfm_source))
545 v = dict_lookup_var_assert (default_dict, fx->name[i]);
546 if (vfm_source == NULL)
548 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
551 if ((width != 0) != (v->width != 0))
553 msg (SE, _("There is already a variable %s of a "
558 if (width != 0 && width != v->width)
560 msg (SE, _("There is already a string variable %s of a "
561 "different width."), fx->name[i]);
566 spec = xmalloc (sizeof *spec);
570 spec->rec = fx->recno;
571 spec->fc = fc + input.w * i;
572 spec->lc = spec->fc + input.w - 1;
573 append_var_spec (first, last, spec);
578 /* Destroy format list F and, if RECURSE is nonzero, all its
581 destroy_fmt_list (struct fmt_list *f, int recurse)
583 struct fmt_list *next;
588 if (recurse && f->f.type == FMT_DESCEND)
589 destroy_fmt_list (f->down, 1);
594 /* Takes a hierarchically structured fmt_list F as constructed by
595 fixed_parse_fortran(), and flattens it, adding the variable
596 specifications to the linked list with head FIRST and tail
597 LAST. NAME_IDX is used to take values from the list of names
598 in FX; it should initially point to a value of 0. */
600 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
601 struct dls_var_spec **first, struct dls_var_spec **last,
606 for (; f; f = f->next)
607 if (f->f.type == FMT_X)
609 else if (f->f.type == FMT_T)
611 else if (f->f.type == FMT_NEWREC)
613 fx->recno += f->count;
617 for (i = 0; i < f->count; i++)
618 if (f->f.type == FMT_DESCEND)
620 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
625 struct dls_var_spec *spec;
629 if (formats[f->f.type].cat & FCAT_STRING)
633 if (*name_idx >= fx->name_cnt)
635 msg (SE, _("The number of format "
636 "specifications exceeds the given number of "
641 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
644 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
648 if (!case_source_is_complex (vfm_source))
651 spec = xmalloc (sizeof *spec);
655 spec->rec = fx->recno;
657 spec->lc = fx->sc + f->f.w - 1;
658 append_var_spec (first, last, spec);
660 convert_fmt_ItoO (&spec->input, &v->print);
668 /* Recursively parses a FORTRAN-like format specification into
669 the linked list with head FIRST and tail TAIL. LEVEL is the
670 level of recursion, starting from 0. Returns the parsed
671 specification if successful, or a null pointer on failure. */
672 static struct fmt_list *
673 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
674 struct dls_var_spec **first,
675 struct dls_var_spec **last)
677 struct fmt_list *head = NULL;
678 struct fmt_list *tail = NULL;
680 lex_force_match ('(');
684 struct fmt_list *new = xmalloc (sizeof *new);
687 /* Append new to list. */
695 if (lex_integer_p ())
697 new->count = lex_integer ();
703 /* Parse format specifier. */
706 new->f.type = FMT_DESCEND;
707 new->down = fixed_parse_fortran_internal (fx, first, last);
708 if (new->down == NULL)
711 else if (lex_match ('/'))
712 new->f.type = FMT_NEWREC;
713 else if (!parse_format_specifier (&new->f, 1)
714 || !check_input_specifier (&new->f))
719 lex_force_match (')');
724 destroy_fmt_list (head, 0);
729 /* Parses a FORTRAN-like format specification into the linked
730 list with head FIRST and tail LAST. Returns nonzero if
733 fixed_parse_fortran (struct fixed_parsing_state *fx,
734 struct dls_var_spec **first, struct dls_var_spec **last)
736 struct fmt_list *list;
739 list = fixed_parse_fortran_internal (fx, first, last);
744 dump_fmt_list (fx, list, first, last, &name_idx);
745 destroy_fmt_list (list, 1);
746 if (name_idx < fx->name_cnt)
748 msg (SE, _("There aren't enough format specifications "
749 "to match the number of variable names given."));
756 /* Displays a table giving information on fixed-format variable
757 parsing on DATA LIST. */
758 /* FIXME: The `Columns' column should be divided into three columns,
759 one for the starting column, one for the dash, one for the ending
760 column; then right-justify the starting column and left-justify the
763 dump_fixed_table (const struct dls_var_spec *specs,
764 const struct file_handle *handle, int nrec)
766 const struct dls_var_spec *spec;
769 const char *filename;
772 for (i = 0, spec = specs; spec; spec = spec->next)
774 t = tab_create (4, i + 1, 0);
775 tab_columns (t, TAB_COL_DOWN, 1);
776 tab_headers (t, 0, 0, 1, 0);
777 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
778 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
779 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
780 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
781 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
782 tab_hline (t, TAL_2, 0, 3, 1);
783 tab_dim (t, tab_natural_dimensions);
785 for (i = 1, spec = specs; spec; spec = spec->next, i++)
787 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
788 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
789 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
791 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
792 fmt_to_string (&spec->input));
795 filename = handle_get_filename (handle);
796 if (filename == NULL)
798 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
799 sprintf (buf, (handle != inline_file
800 ? ngettext ("Reading %d record from file %s.",
801 "Reading %d records from file %s.", nrec)
802 : ngettext ("Reading %d record from the command file.",
803 "Reading %d records from the command file.",
807 tab_title (t, 0, buf);
812 /* Free-format parsing. */
814 /* Parses variable specifications for DATA LIST FREE and adds
815 them to the linked list with head FIRST and tail LAST.
816 Returns nonzero only if successful. */
818 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
823 struct fmt_spec input, output;
829 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
833 if (!parse_format_specifier (&input, 0)
834 || !check_input_specifier (&input)
835 || !lex_force_match (')'))
837 for (i = 0; i < name_cnt; i++)
842 convert_fmt_ItoO (&input, &output);
850 output = get_format();
853 if (input.type == FMT_A || input.type == FMT_AHEX)
857 for (i = 0; i < name_cnt; i++)
859 struct dls_var_spec *spec;
862 v = dict_create_var (default_dict, name[i], width);
865 msg (SE, _("%s is a duplicate variable name."), name[i]);
868 v->print = v->write = output;
870 if (!case_source_is_complex (vfm_source))
873 spec = xmalloc (sizeof *spec);
877 strcpy (spec->name, name[i]);
878 append_var_spec (first, last, spec);
880 for (i = 0; i < name_cnt; i++)
886 lex_error (_("expecting end of command"));
890 /* Displays a table giving information on free-format variable parsing
893 dump_free_table (const struct data_list_pgm *dls)
899 struct dls_var_spec *spec;
900 for (i = 0, spec = dls->first; spec; spec = spec->next)
904 t = tab_create (2, i + 1, 0);
905 tab_columns (t, TAB_COL_DOWN, 1);
906 tab_headers (t, 0, 0, 1, 0);
907 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
908 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
909 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
910 tab_hline (t, TAL_2, 0, 1, 1);
911 tab_dim (t, tab_natural_dimensions);
914 struct dls_var_spec *spec;
916 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
918 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
919 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
924 const char *filename;
926 filename = handle_get_filename (dls->handle);
927 if (filename == NULL)
930 (dls->handle != inline_file
931 ? _("Reading free-form data from file %s.")
932 : _("Reading free-form data from the command file.")),
939 /* Input procedure. */
941 /* Extracts a field from the current position in the current record.
942 Fields can be unquoted or quoted with single- or double-quote
943 characters. *RET_LEN is set to the field length, *RET_CP is set to
944 the field itself. After parsing the field, sets the current
945 position in the record to just past the field. Returns 0 on
946 failure or a 1-based column number indicating the beginning of the
949 cut_field (const struct data_list_pgm *dls, char **ret_cp, int *ret_len)
954 cp = dfm_get_record (dls->handle, &len);
959 if (dls->delim != 0) {
960 if (*cp==dls->delim) {
965 /* Skip leading whitespace and commas. */
966 while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep)
972 /* Three types of fields: quoted with ', quoted with ", unquoted. */
973 /* Quoting does not escape the effects of delimiters for explicitly */
974 /* specified delims */
975 /* (consistency with SPSS doco: */
976 /* For data with explicitly specified value delimiters (for example, */
977 /* DATA LIST FREE (","): */
978 /* - Multiple delimiters without any intervening space can be used */
979 /* to specify missing data. */
980 /* - The specified delimiters cannot occur within a data value, even */
981 /* if you enclose the value in quotation marks or apostrophes. */
982 if (dls->delim==0 && (*cp == '\'' || *cp == '"'))
987 while (cp < ep && *cp != quote)
990 while(cp<ep && *cp!=dls->delim) {
994 *ret_len = cp - *ret_cp;
998 msg (SW, _("Scope of string exceeds line."));
1003 if (dls->delim!=0) {
1004 while(cp<ep && *cp!=dls->delim) {
1009 while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',')
1012 *ret_len = cp - *ret_cp;
1016 int beginning_column;
1018 dfm_set_record (dls->handle, *ret_cp);
1019 beginning_column = dfm_get_cur_col (dls->handle) + 1;
1021 dfm_set_record (dls->handle, cp);
1023 return beginning_column;
1027 typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *);
1028 static data_list_read_func read_from_data_list_fixed;
1029 static data_list_read_func read_from_data_list_free;
1030 static data_list_read_func read_from_data_list_list;
1032 /* Returns the proper function to read the kind of DATA LIST
1033 data specified by DLS. */
1034 static data_list_read_func *
1035 get_data_list_read_func (const struct data_list_pgm *dls)
1040 return read_from_data_list_fixed;
1043 return read_from_data_list_free;
1046 return read_from_data_list_list;
1054 /* Reads a case from the data file into C, parsing it according
1055 to fixed-format syntax rules in DLS. Returns -1 on success,
1056 -2 at end of file. */
1058 read_from_data_list_fixed (const struct data_list_pgm *dls,
1061 struct dls_var_spec *var_spec = dls->first;
1064 if (!dfm_get_record (dls->handle, NULL))
1066 for (i = 1; i <= dls->nrec; i++)
1069 char *line = dfm_get_record (dls->handle, &len);
1073 /* Note that this can't occur on the first record. */
1074 msg (SW, _("Partial case of %d of %d records discarded."),
1079 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1083 data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc);
1084 di.v = &c->data[var_spec->fv];
1086 di.f1 = var_spec->fc;
1087 di.format = var_spec->input;
1092 dfm_fwd_record (dls->handle);
1098 /* Reads a case from the data file into C, parsing it according
1099 to free-format syntax rules in DLS. Returns -1 on success,
1100 -2 at end of file. */
1102 read_from_data_list_free (const struct data_list_pgm *dls,
1105 struct dls_var_spec *var_spec;
1109 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1113 /* Cut out a field and read in a new record if necessary. */
1116 column = cut_field (dls, &field, &len);
1120 if (dfm_get_record (dls->handle, NULL))
1121 dfm_fwd_record (dls->handle);
1122 if (!dfm_get_record (dls->handle, NULL))
1124 if (var_spec != dls->first)
1125 msg (SW, _("Partial case discarded. The first variable "
1126 "missing was %s."), var_spec->name);
1136 di.v = &c->data[var_spec->fv];
1139 di.format = var_spec->input;
1146 /* Reads a case from the data file and parses it according to
1147 list-format syntax rules. Returns -1 on success, -2 at end of
1150 read_from_data_list_list (const struct data_list_pgm *dls,
1153 struct dls_var_spec *var_spec;
1157 if (!dfm_get_record (dls->handle, NULL))
1160 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1162 /* Cut out a field and check for end-of-line. */
1163 int column = cut_field (dls, &field, &len);
1167 if (get_undefined() )
1168 msg (SW, _("Missing value(s) for all variables from %s onward. "
1169 "These will be filled with the system-missing value "
1170 "or blanks, as appropriate."),
1172 for (; var_spec; var_spec = var_spec->next)
1174 int width = get_format_var_width (&var_spec->input);
1176 c->data[var_spec->fv].f = SYSMIS;
1178 memset (c->data[var_spec->fv].s, ' ', width);
1188 di.v = &c->data[var_spec->fv];
1191 di.format = var_spec->input;
1196 dfm_fwd_record (dls->handle);
1200 /* Destroys SPEC. */
1202 destroy_dls_var_spec (struct dls_var_spec *spec)
1204 struct dls_var_spec *next;
1206 while (spec != NULL)
1214 /* Destroys DATA LIST transformation PGM. */
1216 data_list_trns_free (struct trns_header *pgm)
1218 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1219 destroy_dls_var_spec (dls->first);
1220 fh_close_handle (dls->handle);
1224 /* Handle DATA LIST transformation T, parsing data into C. */
1226 data_list_trns_proc (struct trns_header *t, struct ccase *c,
1227 int case_num UNUSED)
1229 struct data_list_pgm *dls = (struct data_list_pgm *) t;
1230 data_list_read_func *read_func;
1233 dfm_push (dls->handle);
1235 read_func = get_data_list_read_func (dls);
1236 retval = read_func (dls, c);
1238 /* Handle end of file. */
1241 /* If we already encountered end of file then this is an
1245 msg (SE, _("Attempt to read past end of file."));
1247 dfm_pop (dls->handle);
1251 /* Otherwise simply note it. */
1257 /* If there was an END subcommand handle it. */
1258 if (dls->end != NULL)
1262 c->data[dls->end->fv].f = 1.0;
1266 c->data[dls->end->fv].f = 0.0;
1269 dfm_pop (dls->handle);
1274 /* Reads all the records from the data file and passes them to
1277 data_list_source_read (struct case_source *source,
1279 write_case_func *write_case, write_case_data wc_data)
1281 struct data_list_pgm *dls = source->aux;
1282 data_list_read_func *read_func = get_data_list_read_func (dls);
1284 dfm_push (dls->handle);
1285 while (read_func (dls, c) != -2)
1286 if (!write_case (wc_data))
1288 dfm_pop (dls->handle);
1290 fh_close_handle (dls->handle);
1293 /* Destroys the source's internal data. */
1295 data_list_source_destroy (struct case_source *source)
1297 data_list_trns_free (source->aux);
1300 const struct case_source_class data_list_source_class =
1304 data_list_source_read,
1305 data_list_source_destroy,
1308 /* REPEATING DATA. */
1310 /* Represents a number or a variable. */
1311 struct rpd_num_or_var
1313 int num; /* Value, or 0. */
1314 struct variable *var; /* Variable, if number==0. */
1317 /* REPEATING DATA private data structure. */
1318 struct repeating_data_trns
1320 struct trns_header h;
1321 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1322 struct file_handle *handle; /* Input file, never NULL. */
1324 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1325 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1326 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1327 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1328 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1329 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1331 /* ID subcommand. */
1332 int id_beg, id_end; /* Beginning & end columns. */
1333 struct variable *id_var; /* DATA LIST variable. */
1334 struct fmt_spec id_spec; /* Input format spec. */
1335 union value *id_value; /* ID value. */
1337 write_case_func *write_case;
1338 write_case_data wc_data;
1341 static trns_free_func repeating_data_trns_free;
1342 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1343 static int parse_repeating_data (struct dls_var_spec **,
1344 struct dls_var_spec **);
1345 static void find_variable_input_spec (struct variable *v,
1346 struct fmt_spec *spec);
1348 /* Parses the REPEATING DATA command. */
1350 cmd_repeating_data (void)
1352 struct repeating_data_trns *rpd;
1354 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1357 /* Bits are set when a particular subcommand has been seen. */
1360 assert (case_source_is_complex (vfm_source));
1362 rpd = xmalloc (sizeof *rpd);
1363 rpd->handle = default_handle;
1364 rpd->first = rpd->last = NULL;
1365 rpd->starts_beg.num = 0;
1366 rpd->starts_beg.var = NULL;
1367 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1368 = rpd->cont_end = rpd->starts_beg;
1369 rpd->id_beg = rpd->id_end = 0;
1371 rpd->id_value = NULL;
1377 if (lex_match_id ("FILE"))
1380 rpd->handle = fh_parse_file_handle ();
1383 if (rpd->handle != default_handle)
1385 msg (SE, _("REPEATING DATA must use the same file as its "
1386 "corresponding DATA LIST or FILE TYPE."));
1390 else if (lex_match_id ("STARTS"))
1395 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1400 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1403 lex_negative_to_dash ();
1404 if (lex_match ('-'))
1406 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1409 /* Otherwise, rpd->starts_end is left uninitialized.
1410 This is okay. We will initialize it later from the
1411 record length of the file. We can't do this now
1412 because we can't be sure that the user has specified
1413 the file handle yet. */
1416 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1417 && rpd->starts_beg.num > rpd->starts_end.num)
1419 msg (SE, _("STARTS beginning column (%d) exceeds "
1420 "STARTS ending column (%d)."),
1421 rpd->starts_beg.num, rpd->starts_end.num);
1425 else if (lex_match_id ("OCCURS"))
1430 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1435 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1438 else if (lex_match_id ("LENGTH"))
1443 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1448 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1451 else if (lex_match_id ("CONTINUED"))
1456 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1461 if (!lex_match ('/'))
1463 if (!parse_num_or_var (&rpd->cont_beg, "CONTINUED beginning column"))
1466 lex_negative_to_dash ();
1468 && !parse_num_or_var (&rpd->cont_end,
1469 "CONTINUED ending column"))
1472 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1473 && rpd->cont_beg.num > rpd->cont_end.num)
1475 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1476 "CONTINUED ending column (%d)."),
1477 rpd->cont_beg.num, rpd->cont_end.num);
1482 rpd->cont_beg.num = 1;
1484 else if (lex_match_id ("ID"))
1489 msg (SE, _("%s subcommand given multiple times."),"ID");
1494 if (!lex_force_int ())
1496 if (lex_integer () < 1)
1498 msg (SE, _("ID beginning column (%ld) must be positive."),
1502 rpd->id_beg = lex_integer ();
1505 lex_negative_to_dash ();
1507 if (lex_match ('-'))
1509 if (!lex_force_int ())
1511 if (lex_integer () < 1)
1513 msg (SE, _("ID ending column (%ld) must be positive."),
1517 if (lex_integer () < rpd->id_end)
1519 msg (SE, _("ID ending column (%ld) cannot be less than "
1520 "ID beginning column (%d)."),
1521 lex_integer (), rpd->id_beg);
1525 rpd->id_end = lex_integer ();
1528 else rpd->id_end = rpd->id_beg;
1530 if (!lex_force_match ('='))
1532 rpd->id_var = parse_variable ();
1533 if (rpd->id_var == NULL)
1536 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1537 rpd->id_value = xmalloc (sizeof *rpd->id_value * rpd->id_var->nv);
1539 else if (lex_match_id ("TABLE"))
1541 else if (lex_match_id ("NOTABLE"))
1543 else if (lex_match_id ("DATA"))
1551 if (!lex_force_match ('/'))
1555 /* Comes here when DATA specification encountered. */
1556 if ((seen & (1 | 2)) != (1 | 2))
1558 if ((seen & 1) == 0)
1559 msg (SE, _("Missing required specification STARTS."));
1560 if ((seen & 2) == 0)
1561 msg (SE, _("Missing required specification OCCURS."));
1565 /* Enforce ID restriction. */
1566 if ((seen & 16) && !(seen & 8))
1568 msg (SE, _("ID specified without CONTINUED."));
1572 /* Calculate starts_end, cont_end if necessary. */
1573 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1574 rpd->starts_end.num = handle_get_record_width (rpd->handle);
1575 if (rpd->cont_end.num == 0 && rpd->starts_end.var == NULL)
1576 rpd->cont_end.num = handle_get_record_width (rpd->handle);
1578 /* Calculate length if possible. */
1579 if ((seen & 4) == 0)
1581 struct dls_var_spec *iter;
1583 for (iter = rpd->first; iter; iter = iter->next)
1585 if (iter->lc > rpd->length.num)
1586 rpd->length.num = iter->lc;
1588 assert (rpd->length.num != 0);
1592 if (!parse_repeating_data (&rpd->first, &rpd->last))
1596 dump_fixed_table (rpd->first, rpd->handle, rpd->last->rec);
1599 struct repeating_data_trns *new_trns;
1601 rpd->h.proc = repeating_data_trns_proc;
1602 rpd->h.free = repeating_data_trns_free;
1604 new_trns = xmalloc (sizeof *new_trns);
1605 memcpy (new_trns, &rpd, sizeof *new_trns);
1606 add_transformation ((struct trns_header *) new_trns);
1609 return lex_end_of_command ();
1612 destroy_dls_var_spec (rpd->first);
1613 free (rpd->id_value);
1617 /* Finds the input format specification for variable V and puts
1618 it in SPEC. Because of the way that DATA LIST is structured,
1619 this is nontrivial. */
1621 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1625 for (i = 0; i < n_trns; i++)
1627 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1629 if (pgm->h.proc == data_list_trns_proc)
1631 struct dls_var_spec *iter;
1633 for (iter = pgm->first; iter; iter = iter->next)
1636 *spec = iter->input;
1645 /* Parses a number or a variable name from the syntax file and puts
1646 the results in VALUE. Ensures that the number is at least 1; else
1647 emits an error based on MESSAGE. Returns nonzero only if
1650 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1655 value->var = parse_variable ();
1656 if (value->var == NULL)
1658 if (value->var->type == ALPHA)
1660 msg (SE, _("String variable not allowed here."));
1664 else if (lex_integer_p ())
1666 value->num = lex_integer ();
1670 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1676 msg (SE, _("Variable or integer expected for %s."), message);
1682 /* Parses data specifications for repeating data groups, adding
1683 them to the linked list with head FIRST and tail LAST.
1684 Returns nonzero only if successful. */
1686 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1688 struct fixed_parsing_state fx;
1694 while (token != '.')
1696 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1701 if (!fixed_parse_compatible (&fx, first, last))
1704 else if (token == '(')
1706 if (!fixed_parse_fortran (&fx, first, last))
1711 msg (SE, _("SPSS-like or FORTRAN-like format "
1712 "specification expected after variable names."));
1716 for (i = 0; i < fx.name_cnt; i++)
1722 lex_error (_("expecting end of command"));
1729 for (i = 0; i < fx.name_cnt; i++)
1735 /* Obtains the real value for rpd_num_or_var N in case C and returns
1736 it. The valid range is nonnegative numbers, but numbers outside
1737 this range can be returned and should be handled by the caller as
1740 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1745 assert (n->num == 0);
1748 double v = c->data[n->var->fv].f;
1750 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1759 /* Parameter record passed to rpd_parse_record(). */
1760 struct rpd_parse_info
1762 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1763 const char *line; /* Line being parsed. */
1764 size_t len; /* Line length. */
1765 int beg, end; /* First and last column of first occurrence. */
1766 int ofs; /* Column offset between repeated occurrences. */
1767 struct ccase *c; /* Case to fill in. */
1768 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1769 int max_occurs; /* Max number of occurrences to parse. */
1772 /* Parses one record of repeated data and outputs corresponding
1773 cases. Returns number of occurrences parsed up to the
1774 maximum specified in INFO. */
1776 rpd_parse_record (const struct rpd_parse_info *info)
1778 struct repeating_data_trns *t = info->trns;
1779 int cur = info->beg;
1782 /* Handle record ID values. */
1785 union value id_temp[MAX_ELEMS_PER_VALUE];
1787 /* Parse record ID into V. */
1791 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1792 di.v = info->verify_id ? id_temp : t->id_value;
1795 di.format = t->id_spec;
1802 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1804 char expected_str [MAX_FORMATTED_LEN + 1];
1805 char actual_str [MAX_FORMATTED_LEN + 1];
1807 data_out (expected_str, &t->id_var->print, t->id_value);
1808 expected_str[t->id_var->print.w] = '\0';
1810 data_out (actual_str, &t->id_var->print, id_temp);
1811 actual_str[t->id_var->print.w] = '\0';
1814 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1815 actual_str, expected_str);
1821 /* Iterate over the set of expected occurrences and record each of
1822 them as a separate case. FIXME: We need to execute any
1823 transformations that follow the current one. */
1827 for (occurrences = 0; occurrences < info->max_occurs; )
1829 if (cur + info->ofs > info->end + 1)
1834 struct dls_var_spec *var_spec = t->first;
1836 for (; var_spec; var_spec = var_spec->next)
1838 int fc = var_spec->fc - 1 + cur;
1839 int lc = var_spec->lc - 1 + cur;
1841 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1846 _("Variable %s starting in column %d extends "
1847 "beyond physical record length of %d."),
1848 var_spec->v->name, fc, info->len);
1854 data_in_finite_line (&di, info->line, info->len, fc, lc);
1855 di.v = &info->c->data[var_spec->fv];
1858 di.format = var_spec->input;
1868 if (!t->write_case (t->wc_data))
1876 /* Reads one set of repetitions of the elements in the REPEATING
1877 DATA structure. Returns -1 on success, -2 on end of file or
1880 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c,
1881 int case_num UNUSED)
1883 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1885 char *line; /* Current record. */
1886 int len; /* Length of current record. */
1888 int starts_beg; /* Starting column. */
1889 int starts_end; /* Ending column. */
1890 int occurs; /* Number of repetitions. */
1891 int length; /* Length of each occurrence. */
1892 int cont_beg; /* Starting column for continuation lines. */
1893 int cont_end; /* Ending column for continuation lines. */
1895 int occurs_left; /* Number of occurrences remaining. */
1897 int code; /* Return value from rpd_parse_record(). */
1899 int skip_first_record = 0;
1901 dfm_push (t->handle);
1903 /* Read the current record. */
1904 dfm_bkwd_record (t->handle, 1);
1905 line = dfm_get_record (t->handle, &len);
1908 dfm_fwd_record (t->handle);
1910 /* Calculate occurs, length. */
1911 occurs_left = occurs = realize_value (&t->occurs, c);
1914 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1917 starts_beg = realize_value (&t->starts_beg, c);
1918 if (starts_beg <= 0)
1920 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1925 starts_end = realize_value (&t->starts_end, c);
1926 if (starts_end < starts_beg)
1928 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1929 "beginning column (%d)."),
1930 starts_end, starts_beg);
1931 skip_first_record = 1;
1933 length = realize_value (&t->length, c);
1936 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1938 occurs = occurs_left = 1;
1940 cont_beg = realize_value (&t->cont_beg, c);
1943 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1948 cont_end = realize_value (&t->cont_end, c);
1949 if (cont_end < cont_beg)
1951 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1952 "beginning column (%d)."),
1953 cont_end, cont_beg);
1957 /* Parse the first record. */
1958 if (!skip_first_record)
1960 struct rpd_parse_info info;
1964 info.beg = starts_beg;
1965 info.end = starts_end;
1969 info.max_occurs = occurs_left;
1970 code = rpd_parse_record (&info);
1973 occurs_left -= code;
1975 else if (cont_beg == 0)
1978 /* Make sure, if some occurrences are left, that we have
1979 continuation records. */
1980 if (occurs_left > 0 && cont_beg == 0)
1983 _("Number of repetitions specified on OCCURS (%d) "
1984 "exceed number of repetitions available in "
1985 "space on STARTS (%d), and CONTINUED not specified."),
1986 occurs, (starts_end - starts_beg + 1) / length);
1990 /* Go on to additional records. */
1991 while (occurs_left != 0)
1993 struct rpd_parse_info info;
1995 assert (occurs_left >= 0);
1997 /* Read in another record. */
1998 line = dfm_get_record (t->handle, &len);
2002 _("Unexpected end of file with %d repetitions "
2003 "remaining out of %d."),
2004 occurs_left, occurs);
2007 dfm_fwd_record (t->handle);
2009 /* Parse this record. */
2013 info.beg = cont_beg;
2014 info.end = cont_end;
2018 info.max_occurs = occurs_left;
2019 code = rpd_parse_record (&info);;
2022 occurs_left -= code;
2025 dfm_pop (t->handle);
2027 /* FIXME: This is a kluge until we've implemented multiplexing of
2032 /* Frees a REPEATING DATA transformation. */
2034 repeating_data_trns_free (struct trns_header *rpd_)
2036 struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_;
2038 destroy_dls_var_spec (rpd->first);
2039 fh_close_handle (rpd->handle);
2040 free (rpd->id_value);
2043 /* Lets repeating_data_trns_proc() know how to write the cases
2044 that it composes. Not elegant. */
2046 repeating_data_set_write_case (struct trns_header *trns,
2047 write_case_func *write_case,
2048 write_case_data wc_data)
2050 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
2052 assert (trns->proc == repeating_data_trns_proc);
2053 t->write_case = write_case;
2054 t->wc_data = wc_data;