1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "data-list.h"
30 #include "debug-print.h"
33 #include "file-handle.h"
43 /* Utility function. */
45 /* FIXME: Either REPEATING DATA must be the last transformation, or we
46 must multiplex the transformations that follow (i.e., perform them
47 for every case that we produce from a repetition instance).
48 Currently we do neither. We should do one or the other. */
50 /* Describes how to parse one variable. */
53 struct dls_var_spec *next; /* Next specification in list. */
55 /* Both free and fixed formats. */
56 struct fmt_spec input; /* Input format of this field. */
57 struct variable *v; /* Associated variable. Used only in
58 parsing. Not safe later. */
59 int fv; /* First value in case. */
61 /* Fixed format only. */
62 int rec; /* Record number (1-based). */
63 int fc, lc; /* Column numbers in record. */
65 /* Free format only. */
66 char name[9]; /* Name of variable. */
69 /* Constants for DATA LIST type. */
70 /* Must match table in cmd_data_list(). */
78 /* DATA LIST private data structure. */
83 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
84 struct file_handle *handle; /* Input file, never NULL. */
86 int type; /* A DLS_* constant. */
87 struct variable *end; /* Variable specified on END subcommand. */
88 int eof; /* End of file encountered. */
89 int nrec; /* Number of records. */
90 size_t case_size; /* Case size in bytes. */
91 char *delims; /* Delimiters if any; not null-terminated. */
92 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
95 static int parse_fixed (struct data_list_pgm *);
96 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
97 static void dump_fixed_table (const struct dls_var_spec *specs,
98 const struct file_handle *handle, int nrec);
99 static void dump_free_table (const struct data_list_pgm *);
100 static void destroy_dls_var_spec (struct dls_var_spec *);
101 static trns_free_func data_list_trns_free;
102 static trns_proc_func data_list_trns_proc;
104 /* Message title for REPEATING DATA. */
105 #define RPD_ERR "REPEATING DATA: "
110 /* DATA LIST program under construction. */
111 struct data_list_pgm *dls;
113 /* 0=print no table, 1=print table. (TABLE subcommand.) */
116 if (!case_source_is_complex (vfm_source))
117 discard_variables ();
119 dls = xmalloc (sizeof *dls);
120 dls->handle = default_handle;
127 dls->first = dls->last = NULL;
131 if (lex_match_id ("FILE"))
134 dls->handle = fh_parse_file_handle ();
137 if (case_source_is_class (vfm_source, &file_type_source_class)
138 && dls->handle != default_handle)
140 msg (SE, _("DATA LIST may not use a different file from "
141 "that specified on its surrounding FILE TYPE."));
145 else if (lex_match_id ("RECORDS"))
149 if (!lex_force_int ())
151 dls->nrec = lex_integer ();
155 else if (lex_match_id ("END"))
159 msg (SE, _("The END subcommand may only be specified once."));
164 if (!lex_force_id ())
166 dls->end = dict_lookup_var (default_dict, tokid);
168 dls->end = dict_create_var_assert (default_dict, tokid, 0);
171 else if (token == T_ID)
173 if (lex_match_id ("NOTABLE"))
175 else if (lex_match_id ("TABLE"))
180 if (lex_match_id ("FIXED"))
182 else if (lex_match_id ("FREE"))
184 else if (lex_match_id ("LIST"))
194 msg (SE, _("Only one of FIXED, FREE, or LIST may "
200 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
203 while (!lex_match (')'))
207 if (lex_match_id ("TAB"))
209 else if (token == T_STRING && tokstr.length == 1)
210 delim = tokstr.string[0];
217 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
218 dls->delims[dls->delim_cnt++] = delim;
232 dls->case_size = dict_get_case_size (default_dict);
233 default_handle = dls->handle;
236 dls->type = DLS_FIXED;
240 if (dls->type == DLS_FREE)
246 if (dls->type == DLS_FIXED)
248 if (!parse_fixed (dls))
251 dump_fixed_table (dls->first, dls->handle, dls->nrec);
255 if (!parse_free (&dls->first, &dls->last))
258 dump_free_table (dls);
261 if (!dfm_open_for_reading (dls->handle))
264 if (vfm_source != NULL)
266 struct data_list_pgm *new_pgm;
268 dls->h.proc = data_list_trns_proc;
269 dls->h.free = data_list_trns_free;
271 new_pgm = xmalloc (sizeof *new_pgm);
272 memcpy (new_pgm, &dls, sizeof *new_pgm);
273 add_transformation (&new_pgm->h);
276 vfm_source = create_case_source (&data_list_source_class,
282 destroy_dls_var_spec (dls->first);
287 /* Adds SPEC to the linked list with head at FIRST and tail at
290 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
291 struct dls_var_spec *spec)
298 (*last)->next = spec;
302 /* Fixed-format parsing. */
304 /* Used for chaining together fortran-like format specifiers. */
307 struct fmt_list *next;
310 struct fmt_list *down;
313 /* State of parsing DATA LIST. */
314 struct fixed_parsing_state
316 char **name; /* Variable names. */
317 int name_cnt; /* Number of names. */
319 int recno; /* Index of current record. */
320 int sc; /* 1-based column number of starting column for
321 next field to output. */
324 static int fixed_parse_compatible (struct fixed_parsing_state *,
325 struct dls_var_spec **,
326 struct dls_var_spec **);
327 static int fixed_parse_fortran (struct fixed_parsing_state *,
328 struct dls_var_spec **,
329 struct dls_var_spec **);
331 /* Parses all the variable specifications for DATA LIST FIXED,
332 storing them into DLS. Returns nonzero if successful. */
334 parse_fixed (struct data_list_pgm *dls)
336 struct fixed_parsing_state fx;
344 while (lex_match ('/'))
347 if (lex_integer_p ())
349 if (lex_integer () < fx.recno)
351 msg (SE, _("The record number specified, %ld, is "
352 "before the previous record, %d. Data "
353 "fields must be listed in order of "
354 "increasing record number."),
355 lex_integer (), fx.recno - 1);
359 fx.recno = lex_integer ();
365 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
370 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
373 else if (token == '(')
375 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
380 msg (SE, _("SPSS-like or FORTRAN-like format "
381 "specification expected after variable names."));
385 for (i = 0; i < fx.name_cnt; i++)
389 if (dls->first == NULL)
391 msg (SE, _("At least one variable must be specified."));
394 if (dls->nrec && dls->last->rec > dls->nrec)
396 msg (SE, _("Variables are specified on records that "
397 "should not exist according to RECORDS subcommand."));
401 dls->nrec = dls->last->rec;
404 lex_error (_("expecting end of command"));
410 for (i = 0; i < fx.name_cnt; i++)
416 /* Parses a variable specification in the form 1-10 (A) based on
417 FX and adds specifications to the linked list with head at
418 FIRST and tail at LAST. */
420 fixed_parse_compatible (struct fixed_parsing_state *fx,
421 struct dls_var_spec **first, struct dls_var_spec **last)
423 struct fmt_spec input;
429 if (!lex_force_int ())
434 msg (SE, _("Column positions for fields must be positive."));
440 lex_negative_to_dash ();
443 if (!lex_force_int ())
448 msg (SE, _("Column positions for fields must be positive."));
453 msg (SE, _("The ending column for a field must be "
454 "greater than the starting column."));
463 /* Divide columns evenly. */
464 input.w = (lc - fc + 1) / fx->name_cnt;
465 if ((lc - fc + 1) % fx->name_cnt)
467 msg (SE, _("The %d columns %d-%d "
468 "can't be evenly divided into %d fields."),
469 lc - fc + 1, fc, lc, fx->name_cnt);
473 /* Format specifier. */
476 struct fmt_desc *fdp;
482 input.type = parse_format_specifier_name (&cp, 0);
483 if (input.type == -1)
487 msg (SE, _("A format specifier on this line "
488 "has extra characters on the end."));
498 if (lex_integer_p ())
500 if (lex_integer () < 1)
502 msg (SE, _("The value for number of decimal places "
503 "must be at least 1."));
507 input.d = lex_integer ();
513 fdp = &formats[input.type];
514 if (fdp->n_args < 2 && input.d)
516 msg (SE, _("Input format %s doesn't accept decimal places."),
524 if (!lex_force_match (')'))
532 if (!check_input_specifier (&input))
535 /* Start column for next specification. */
538 /* Width of variables to create. */
539 if (input.type == FMT_A || input.type == FMT_AHEX)
544 /* Create variables and var specs. */
545 for (i = 0; i < fx->name_cnt; i++)
547 struct dls_var_spec *spec;
550 v = dict_create_var (default_dict, fx->name[i], width);
553 convert_fmt_ItoO (&input, &v->print);
555 if (!case_source_is_complex (vfm_source))
560 v = dict_lookup_var_assert (default_dict, fx->name[i]);
561 if (vfm_source == NULL)
563 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
566 if ((width != 0) != (v->width != 0))
568 msg (SE, _("There is already a variable %s of a "
573 if (width != 0 && width != v->width)
575 msg (SE, _("There is already a string variable %s of a "
576 "different width."), fx->name[i]);
581 spec = xmalloc (sizeof *spec);
585 spec->rec = fx->recno;
586 spec->fc = fc + input.w * i;
587 spec->lc = spec->fc + input.w - 1;
588 append_var_spec (first, last, spec);
593 /* Destroy format list F and, if RECURSE is nonzero, all its
596 destroy_fmt_list (struct fmt_list *f, int recurse)
598 struct fmt_list *next;
603 if (recurse && f->f.type == FMT_DESCEND)
604 destroy_fmt_list (f->down, 1);
609 /* Takes a hierarchically structured fmt_list F as constructed by
610 fixed_parse_fortran(), and flattens it, adding the variable
611 specifications to the linked list with head FIRST and tail
612 LAST. NAME_IDX is used to take values from the list of names
613 in FX; it should initially point to a value of 0. */
615 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
616 struct dls_var_spec **first, struct dls_var_spec **last,
621 for (; f; f = f->next)
622 if (f->f.type == FMT_X)
624 else if (f->f.type == FMT_T)
626 else if (f->f.type == FMT_NEWREC)
628 fx->recno += f->count;
632 for (i = 0; i < f->count; i++)
633 if (f->f.type == FMT_DESCEND)
635 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
640 struct dls_var_spec *spec;
644 if (formats[f->f.type].cat & FCAT_STRING)
648 if (*name_idx >= fx->name_cnt)
650 msg (SE, _("The number of format "
651 "specifications exceeds the given number of "
656 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
659 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
663 if (!case_source_is_complex (vfm_source))
666 spec = xmalloc (sizeof *spec);
670 spec->rec = fx->recno;
672 spec->lc = fx->sc + f->f.w - 1;
673 append_var_spec (first, last, spec);
675 convert_fmt_ItoO (&spec->input, &v->print);
683 /* Recursively parses a FORTRAN-like format specification into
684 the linked list with head FIRST and tail TAIL. LEVEL is the
685 level of recursion, starting from 0. Returns the parsed
686 specification if successful, or a null pointer on failure. */
687 static struct fmt_list *
688 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
689 struct dls_var_spec **first,
690 struct dls_var_spec **last)
692 struct fmt_list *head = NULL;
693 struct fmt_list *tail = NULL;
695 lex_force_match ('(');
699 struct fmt_list *new = xmalloc (sizeof *new);
702 /* Append new to list. */
710 if (lex_integer_p ())
712 new->count = lex_integer ();
718 /* Parse format specifier. */
721 new->f.type = FMT_DESCEND;
722 new->down = fixed_parse_fortran_internal (fx, first, last);
723 if (new->down == NULL)
726 else if (lex_match ('/'))
727 new->f.type = FMT_NEWREC;
728 else if (!parse_format_specifier (&new->f, 1)
729 || !check_input_specifier (&new->f))
734 lex_force_match (')');
739 destroy_fmt_list (head, 0);
744 /* Parses a FORTRAN-like format specification into the linked
745 list with head FIRST and tail LAST. Returns nonzero if
748 fixed_parse_fortran (struct fixed_parsing_state *fx,
749 struct dls_var_spec **first, struct dls_var_spec **last)
751 struct fmt_list *list;
754 list = fixed_parse_fortran_internal (fx, first, last);
759 dump_fmt_list (fx, list, first, last, &name_idx);
760 destroy_fmt_list (list, 1);
761 if (name_idx < fx->name_cnt)
763 msg (SE, _("There aren't enough format specifications "
764 "to match the number of variable names given."));
771 /* Displays a table giving information on fixed-format variable
772 parsing on DATA LIST. */
773 /* FIXME: The `Columns' column should be divided into three columns,
774 one for the starting column, one for the dash, one for the ending
775 column; then right-justify the starting column and left-justify the
778 dump_fixed_table (const struct dls_var_spec *specs,
779 const struct file_handle *handle, int nrec)
781 const struct dls_var_spec *spec;
784 const char *filename;
787 for (i = 0, spec = specs; spec; spec = spec->next)
789 t = tab_create (4, i + 1, 0);
790 tab_columns (t, TAB_COL_DOWN, 1);
791 tab_headers (t, 0, 0, 1, 0);
792 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
793 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
794 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
795 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
796 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
797 tab_hline (t, TAL_2, 0, 3, 1);
798 tab_dim (t, tab_natural_dimensions);
800 for (i = 1, spec = specs; spec; spec = spec->next, i++)
802 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
803 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
804 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
806 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
807 fmt_to_string (&spec->input));
810 filename = handle_get_filename (handle);
811 if (filename == NULL)
813 buf = local_alloc (strlen (filename) + INT_DIGITS + 80);
814 sprintf (buf, (handle != inline_file
815 ? ngettext ("Reading %d record from file %s.",
816 "Reading %d records from file %s.", nrec)
817 : ngettext ("Reading %d record from the command file.",
818 "Reading %d records from the command file.",
822 tab_title (t, 0, buf);
827 /* Free-format parsing. */
829 /* Parses variable specifications for DATA LIST FREE and adds
830 them to the linked list with head FIRST and tail LAST.
831 Returns nonzero only if successful. */
833 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
838 struct fmt_spec input, output;
844 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
848 if (!parse_format_specifier (&input, 0)
849 || !check_input_specifier (&input)
850 || !lex_force_match (')'))
852 for (i = 0; i < name_cnt; i++)
857 convert_fmt_ItoO (&input, &output);
865 output = get_format();
868 if (input.type == FMT_A || input.type == FMT_AHEX)
872 for (i = 0; i < name_cnt; i++)
874 struct dls_var_spec *spec;
877 v = dict_create_var (default_dict, name[i], width);
880 msg (SE, _("%s is a duplicate variable name."), name[i]);
883 v->print = v->write = output;
885 if (!case_source_is_complex (vfm_source))
888 spec = xmalloc (sizeof *spec);
892 strcpy (spec->name, name[i]);
893 append_var_spec (first, last, spec);
895 for (i = 0; i < name_cnt; i++)
901 lex_error (_("expecting end of command"));
905 /* Displays a table giving information on free-format variable parsing
908 dump_free_table (const struct data_list_pgm *dls)
914 struct dls_var_spec *spec;
915 for (i = 0, spec = dls->first; spec; spec = spec->next)
919 t = tab_create (2, i + 1, 0);
920 tab_columns (t, TAB_COL_DOWN, 1);
921 tab_headers (t, 0, 0, 1, 0);
922 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
923 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
924 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
925 tab_hline (t, TAL_2, 0, 1, 1);
926 tab_dim (t, tab_natural_dimensions);
929 struct dls_var_spec *spec;
931 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
933 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
934 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
939 const char *filename;
941 filename = handle_get_filename (dls->handle);
942 if (filename == NULL)
945 (dls->handle != inline_file
946 ? _("Reading free-form data from file %s.")
947 : _("Reading free-form data from the command file.")),
954 /* Input procedure. */
956 /* Extracts a field from the current position in the current
957 record. Fields can be unquoted or quoted with single- or
958 double-quote characters. *FIELD is set to the field content.
959 After parsing the field, sets the current position in the
960 record to just past the field and any trailing delimiter.
961 END_BLANK is used internally; it should be initialized by the
962 caller to 0 and left alone afterward. Returns 0 on failure or
963 a 1-based column number indicating the beginning of the field
966 cut_field (const struct data_list_pgm *dls, struct len_string *field,
969 struct len_string line;
973 if (dfm_eof (dls->handle))
975 if (dls->delim_cnt == 0)
976 dfm_expand_tabs (dls->handle);
977 dfm_get_record (dls->handle, &line);
979 cp = ls_c_str (&line);
980 if (dls->delim_cnt == 0)
982 /* Skip leading whitespace. */
983 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
985 if (cp >= ls_end (&line))
988 /* Handle actual data, whether quoted or unquoted. */
989 if (*cp == '\'' || *cp == '"')
993 field->string = ++cp;
994 while (cp < ls_end (&line) && *cp != quote)
996 field->length = cp - field->string;
997 if (cp < ls_end (&line))
1000 msg (SW, _("Quoted string missing terminating `%c'."), quote);
1005 while (cp < ls_end (&line)
1006 && !isspace ((unsigned char) *cp) && *cp != ',')
1008 field->length = cp - field->string;
1011 /* Skip trailing whitespace and a single comma if present. */
1012 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
1014 if (cp < ls_end (&line) && *cp == ',')
1019 if (cp >= ls_end (&line))
1021 int column = dfm_column_start (dls->handle);
1022 /* A blank line or a line that ends in \t has a
1023 trailing blank field. */
1024 if (column == 1 || (column > 1 && cp[-1] == '\t'))
1026 if (*end_blank == 0)
1029 field->string = ls_end (&line);
1031 dfm_forward_record (dls->handle);
1046 while (cp < ls_end (&line)
1047 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1049 field->length = cp - field->string;
1050 if (cp < ls_end (&line))
1055 dfm_forward_columns (dls->handle, field->string - line.string);
1056 column_start = dfm_column_start (dls->handle);
1058 dfm_forward_columns (dls->handle, cp - field->string);
1060 return column_start;
1063 typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *);
1064 static data_list_read_func read_from_data_list_fixed;
1065 static data_list_read_func read_from_data_list_free;
1066 static data_list_read_func read_from_data_list_list;
1068 /* Returns the proper function to read the kind of DATA LIST
1069 data specified by DLS. */
1070 static data_list_read_func *
1071 get_data_list_read_func (const struct data_list_pgm *dls)
1076 return read_from_data_list_fixed;
1079 return read_from_data_list_free;
1082 return read_from_data_list_list;
1090 /* Reads a case from the data file into C, parsing it according
1091 to fixed-format syntax rules in DLS. Returns -1 on success,
1092 -2 at end of file. */
1094 read_from_data_list_fixed (const struct data_list_pgm *dls,
1097 struct dls_var_spec *var_spec = dls->first;
1100 if (dfm_eof (dls->handle))
1102 for (i = 1; i <= dls->nrec; i++)
1104 struct len_string line;
1106 if (dfm_eof (dls->handle))
1108 /* Note that this can't occur on the first record. */
1109 msg (SW, _("Partial case of %d of %d records discarded."),
1113 dfm_expand_tabs (dls->handle);
1114 dfm_get_record (dls->handle, &line);
1116 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1120 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1121 var_spec->fc, var_spec->lc);
1122 di.v = &c->data[var_spec->fv];
1124 di.f1 = var_spec->fc;
1125 di.format = var_spec->input;
1130 dfm_forward_record (dls->handle);
1136 /* Reads a case from the data file into C, parsing it according
1137 to free-format syntax rules in DLS. Returns -1 on success,
1138 -2 at end of file. */
1140 read_from_data_list_free (const struct data_list_pgm *dls,
1143 struct dls_var_spec *var_spec;
1146 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1148 struct len_string field;
1151 /* Cut out a field and read in a new record if necessary. */
1154 column = cut_field (dls, &field, &end_blank);
1158 if (!dfm_eof (dls->handle))
1159 dfm_forward_record (dls->handle);
1160 if (dfm_eof (dls->handle))
1162 if (var_spec != dls->first)
1163 msg (SW, _("Partial case discarded. The first variable "
1164 "missing was %s."), var_spec->name);
1172 di.s = ls_c_str (&field);
1173 di.e = ls_end (&field);
1174 di.v = &c->data[var_spec->fv];
1177 di.format = var_spec->input;
1184 /* Reads a case from the data file and parses it according to
1185 list-format syntax rules. Returns -1 on success, -2 at end of
1188 read_from_data_list_list (const struct data_list_pgm *dls,
1191 struct dls_var_spec *var_spec;
1194 if (dfm_eof (dls->handle))
1197 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1199 struct len_string field;
1202 /* Cut out a field and check for end-of-line. */
1203 column = cut_field (dls, &field, &end_blank);
1206 if (get_undefined ())
1207 msg (SW, _("Missing value(s) for all variables from %s onward. "
1208 "These will be filled with the system-missing value "
1209 "or blanks, as appropriate."),
1211 for (; var_spec; var_spec = var_spec->next)
1213 int width = get_format_var_width (&var_spec->input);
1215 c->data[var_spec->fv].f = SYSMIS;
1217 memset (c->data[var_spec->fv].s, ' ', width);
1225 di.s = ls_c_str (&field);
1226 di.e = ls_end (&field);
1227 di.v = &c->data[var_spec->fv];
1230 di.format = var_spec->input;
1235 dfm_forward_record (dls->handle);
1239 /* Destroys SPEC. */
1241 destroy_dls_var_spec (struct dls_var_spec *spec)
1243 struct dls_var_spec *next;
1245 while (spec != NULL)
1253 /* Destroys DATA LIST transformation PGM. */
1255 data_list_trns_free (struct trns_header *pgm)
1257 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1258 destroy_dls_var_spec (dls->first);
1259 fh_close_handle (dls->handle);
1263 /* Handle DATA LIST transformation T, parsing data into C. */
1265 data_list_trns_proc (struct trns_header *t, struct ccase *c,
1266 int case_num UNUSED)
1268 struct data_list_pgm *dls = (struct data_list_pgm *) t;
1269 data_list_read_func *read_func;
1272 dfm_push (dls->handle);
1274 read_func = get_data_list_read_func (dls);
1275 retval = read_func (dls, c);
1277 /* Handle end of file. */
1280 /* If we already encountered end of file then this is an
1284 msg (SE, _("Attempt to read past end of file."));
1286 dfm_pop (dls->handle);
1290 /* Otherwise simply note it. */
1296 /* If there was an END subcommand handle it. */
1297 if (dls->end != NULL)
1301 c->data[dls->end->fv].f = 1.0;
1305 c->data[dls->end->fv].f = 0.0;
1308 dfm_pop (dls->handle);
1313 /* Reads all the records from the data file and passes them to
1316 data_list_source_read (struct case_source *source,
1318 write_case_func *write_case, write_case_data wc_data)
1320 struct data_list_pgm *dls = source->aux;
1321 data_list_read_func *read_func = get_data_list_read_func (dls);
1323 dfm_push (dls->handle);
1324 while (read_func (dls, c) != -2)
1325 if (!write_case (wc_data))
1327 dfm_pop (dls->handle);
1329 fh_close_handle (dls->handle);
1332 /* Destroys the source's internal data. */
1334 data_list_source_destroy (struct case_source *source)
1336 data_list_trns_free (source->aux);
1339 const struct case_source_class data_list_source_class =
1343 data_list_source_read,
1344 data_list_source_destroy,
1347 /* REPEATING DATA. */
1349 /* Represents a number or a variable. */
1350 struct rpd_num_or_var
1352 int num; /* Value, or 0. */
1353 struct variable *var; /* Variable, if number==0. */
1356 /* REPEATING DATA private data structure. */
1357 struct repeating_data_trns
1359 struct trns_header h;
1360 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1361 struct file_handle *handle; /* Input file, never NULL. */
1363 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1364 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1365 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1366 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1367 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1368 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1370 /* ID subcommand. */
1371 int id_beg, id_end; /* Beginning & end columns. */
1372 struct variable *id_var; /* DATA LIST variable. */
1373 struct fmt_spec id_spec; /* Input format spec. */
1374 union value *id_value; /* ID value. */
1376 write_case_func *write_case;
1377 write_case_data wc_data;
1380 static trns_free_func repeating_data_trns_free;
1381 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1382 static int parse_repeating_data (struct dls_var_spec **,
1383 struct dls_var_spec **);
1384 static void find_variable_input_spec (struct variable *v,
1385 struct fmt_spec *spec);
1387 /* Parses the REPEATING DATA command. */
1389 cmd_repeating_data (void)
1391 struct repeating_data_trns *rpd;
1393 /* 0=print no table, 1=print table. (TABLE subcommand.) */
1396 /* Bits are set when a particular subcommand has been seen. */
1399 assert (case_source_is_complex (vfm_source));
1401 rpd = xmalloc (sizeof *rpd);
1402 rpd->handle = default_handle;
1403 rpd->first = rpd->last = NULL;
1404 rpd->starts_beg.num = 0;
1405 rpd->starts_beg.var = NULL;
1406 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1407 = rpd->cont_end = rpd->starts_beg;
1408 rpd->id_beg = rpd->id_end = 0;
1410 rpd->id_value = NULL;
1416 if (lex_match_id ("FILE"))
1419 rpd->handle = fh_parse_file_handle ();
1422 if (rpd->handle != default_handle)
1424 msg (SE, _("REPEATING DATA must use the same file as its "
1425 "corresponding DATA LIST or FILE TYPE."));
1429 else if (lex_match_id ("STARTS"))
1434 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1439 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1442 lex_negative_to_dash ();
1443 if (lex_match ('-'))
1445 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1448 /* Otherwise, rpd->starts_end is left uninitialized.
1449 This is okay. We will initialize it later from the
1450 record length of the file. We can't do this now
1451 because we can't be sure that the user has specified
1452 the file handle yet. */
1455 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1456 && rpd->starts_beg.num > rpd->starts_end.num)
1458 msg (SE, _("STARTS beginning column (%d) exceeds "
1459 "STARTS ending column (%d)."),
1460 rpd->starts_beg.num, rpd->starts_end.num);
1464 else if (lex_match_id ("OCCURS"))
1469 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1474 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1477 else if (lex_match_id ("LENGTH"))
1482 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1487 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1490 else if (lex_match_id ("CONTINUED"))
1495 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1500 if (!lex_match ('/'))
1502 if (!parse_num_or_var (&rpd->cont_beg, "CONTINUED beginning column"))
1505 lex_negative_to_dash ();
1507 && !parse_num_or_var (&rpd->cont_end,
1508 "CONTINUED ending column"))
1511 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1512 && rpd->cont_beg.num > rpd->cont_end.num)
1514 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1515 "CONTINUED ending column (%d)."),
1516 rpd->cont_beg.num, rpd->cont_end.num);
1521 rpd->cont_beg.num = 1;
1523 else if (lex_match_id ("ID"))
1528 msg (SE, _("%s subcommand given multiple times."),"ID");
1533 if (!lex_force_int ())
1535 if (lex_integer () < 1)
1537 msg (SE, _("ID beginning column (%ld) must be positive."),
1541 rpd->id_beg = lex_integer ();
1544 lex_negative_to_dash ();
1546 if (lex_match ('-'))
1548 if (!lex_force_int ())
1550 if (lex_integer () < 1)
1552 msg (SE, _("ID ending column (%ld) must be positive."),
1556 if (lex_integer () < rpd->id_end)
1558 msg (SE, _("ID ending column (%ld) cannot be less than "
1559 "ID beginning column (%d)."),
1560 lex_integer (), rpd->id_beg);
1564 rpd->id_end = lex_integer ();
1567 else rpd->id_end = rpd->id_beg;
1569 if (!lex_force_match ('='))
1571 rpd->id_var = parse_variable ();
1572 if (rpd->id_var == NULL)
1575 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1576 rpd->id_value = xmalloc (sizeof *rpd->id_value * rpd->id_var->nv);
1578 else if (lex_match_id ("TABLE"))
1580 else if (lex_match_id ("NOTABLE"))
1582 else if (lex_match_id ("DATA"))
1590 if (!lex_force_match ('/'))
1594 /* Comes here when DATA specification encountered. */
1595 if ((seen & (1 | 2)) != (1 | 2))
1597 if ((seen & 1) == 0)
1598 msg (SE, _("Missing required specification STARTS."));
1599 if ((seen & 2) == 0)
1600 msg (SE, _("Missing required specification OCCURS."));
1604 /* Enforce ID restriction. */
1605 if ((seen & 16) && !(seen & 8))
1607 msg (SE, _("ID specified without CONTINUED."));
1611 /* Calculate starts_end, cont_end if necessary. */
1612 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1613 rpd->starts_end.num = handle_get_record_width (rpd->handle);
1614 if (rpd->cont_end.num == 0 && rpd->starts_end.var == NULL)
1615 rpd->cont_end.num = handle_get_record_width (rpd->handle);
1617 /* Calculate length if possible. */
1618 if ((seen & 4) == 0)
1620 struct dls_var_spec *iter;
1622 for (iter = rpd->first; iter; iter = iter->next)
1624 if (iter->lc > rpd->length.num)
1625 rpd->length.num = iter->lc;
1627 assert (rpd->length.num != 0);
1631 if (!parse_repeating_data (&rpd->first, &rpd->last))
1635 dump_fixed_table (rpd->first, rpd->handle, rpd->last->rec);
1638 struct repeating_data_trns *new_trns;
1640 rpd->h.proc = repeating_data_trns_proc;
1641 rpd->h.free = repeating_data_trns_free;
1643 new_trns = xmalloc (sizeof *new_trns);
1644 memcpy (new_trns, &rpd, sizeof *new_trns);
1645 add_transformation ((struct trns_header *) new_trns);
1648 return lex_end_of_command ();
1651 destroy_dls_var_spec (rpd->first);
1652 free (rpd->id_value);
1656 /* Finds the input format specification for variable V and puts
1657 it in SPEC. Because of the way that DATA LIST is structured,
1658 this is nontrivial. */
1660 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1664 for (i = 0; i < n_trns; i++)
1666 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1668 if (pgm->h.proc == data_list_trns_proc)
1670 struct dls_var_spec *iter;
1672 for (iter = pgm->first; iter; iter = iter->next)
1675 *spec = iter->input;
1684 /* Parses a number or a variable name from the syntax file and puts
1685 the results in VALUE. Ensures that the number is at least 1; else
1686 emits an error based on MESSAGE. Returns nonzero only if
1689 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1694 value->var = parse_variable ();
1695 if (value->var == NULL)
1697 if (value->var->type == ALPHA)
1699 msg (SE, _("String variable not allowed here."));
1703 else if (lex_integer_p ())
1705 value->num = lex_integer ();
1709 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1715 msg (SE, _("Variable or integer expected for %s."), message);
1721 /* Parses data specifications for repeating data groups, adding
1722 them to the linked list with head FIRST and tail LAST.
1723 Returns nonzero only if successful. */
1725 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1727 struct fixed_parsing_state fx;
1733 while (token != '.')
1735 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1740 if (!fixed_parse_compatible (&fx, first, last))
1743 else if (token == '(')
1745 if (!fixed_parse_fortran (&fx, first, last))
1750 msg (SE, _("SPSS-like or FORTRAN-like format "
1751 "specification expected after variable names."));
1755 for (i = 0; i < fx.name_cnt; i++)
1761 lex_error (_("expecting end of command"));
1768 for (i = 0; i < fx.name_cnt; i++)
1774 /* Obtains the real value for rpd_num_or_var N in case C and returns
1775 it. The valid range is nonnegative numbers, but numbers outside
1776 this range can be returned and should be handled by the caller as
1779 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1784 assert (n->num == 0);
1787 double v = c->data[n->var->fv].f;
1789 if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX)
1798 /* Parameter record passed to rpd_parse_record(). */
1799 struct rpd_parse_info
1801 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1802 const char *line; /* Line being parsed. */
1803 size_t len; /* Line length. */
1804 int beg, end; /* First and last column of first occurrence. */
1805 int ofs; /* Column offset between repeated occurrences. */
1806 struct ccase *c; /* Case to fill in. */
1807 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1808 int max_occurs; /* Max number of occurrences to parse. */
1811 /* Parses one record of repeated data and outputs corresponding
1812 cases. Returns number of occurrences parsed up to the
1813 maximum specified in INFO. */
1815 rpd_parse_record (const struct rpd_parse_info *info)
1817 struct repeating_data_trns *t = info->trns;
1818 int cur = info->beg;
1821 /* Handle record ID values. */
1824 union value id_temp[MAX_ELEMS_PER_VALUE];
1826 /* Parse record ID into V. */
1830 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1831 di.v = info->verify_id ? id_temp : t->id_value;
1834 di.format = t->id_spec;
1841 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1843 char expected_str [MAX_FORMATTED_LEN + 1];
1844 char actual_str [MAX_FORMATTED_LEN + 1];
1846 data_out (expected_str, &t->id_var->print, t->id_value);
1847 expected_str[t->id_var->print.w] = '\0';
1849 data_out (actual_str, &t->id_var->print, id_temp);
1850 actual_str[t->id_var->print.w] = '\0';
1853 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1854 actual_str, expected_str);
1860 /* Iterate over the set of expected occurrences and record each of
1861 them as a separate case. FIXME: We need to execute any
1862 transformations that follow the current one. */
1866 for (occurrences = 0; occurrences < info->max_occurs; )
1868 if (cur + info->ofs > info->end + 1)
1873 struct dls_var_spec *var_spec = t->first;
1875 for (; var_spec; var_spec = var_spec->next)
1877 int fc = var_spec->fc - 1 + cur;
1878 int lc = var_spec->lc - 1 + cur;
1880 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1885 _("Variable %s starting in column %d extends "
1886 "beyond physical record length of %d."),
1887 var_spec->v->name, fc, info->len);
1893 data_in_finite_line (&di, info->line, info->len, fc, lc);
1894 di.v = &info->c->data[var_spec->fv];
1897 di.format = var_spec->input;
1907 if (!t->write_case (t->wc_data))
1915 /* Reads one set of repetitions of the elements in the REPEATING
1916 DATA structure. Returns -1 on success, -2 on end of file or
1919 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c,
1920 int case_num UNUSED)
1922 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1924 struct len_string line; /* Current record. */
1926 int starts_beg; /* Starting column. */
1927 int starts_end; /* Ending column. */
1928 int occurs; /* Number of repetitions. */
1929 int length; /* Length of each occurrence. */
1930 int cont_beg; /* Starting column for continuation lines. */
1931 int cont_end; /* Ending column for continuation lines. */
1933 int occurs_left; /* Number of occurrences remaining. */
1935 int code; /* Return value from rpd_parse_record(). */
1937 int skip_first_record = 0;
1939 dfm_push (t->handle);
1941 /* Read the current record. */
1942 dfm_reread_record (t->handle, 1);
1943 dfm_expand_tabs (t->handle);
1944 if (dfm_eof (t->handle))
1946 dfm_get_record (t->handle, &line);
1947 dfm_forward_record (t->handle);
1949 /* Calculate occurs, length. */
1950 occurs_left = occurs = realize_value (&t->occurs, c);
1953 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1956 starts_beg = realize_value (&t->starts_beg, c);
1957 if (starts_beg <= 0)
1959 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1964 starts_end = realize_value (&t->starts_end, c);
1965 if (starts_end < starts_beg)
1967 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1968 "beginning column (%d)."),
1969 starts_end, starts_beg);
1970 skip_first_record = 1;
1972 length = realize_value (&t->length, c);
1975 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1977 occurs = occurs_left = 1;
1979 cont_beg = realize_value (&t->cont_beg, c);
1982 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1987 cont_end = realize_value (&t->cont_end, c);
1988 if (cont_end < cont_beg)
1990 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1991 "beginning column (%d)."),
1992 cont_end, cont_beg);
1996 /* Parse the first record. */
1997 if (!skip_first_record)
1999 struct rpd_parse_info info;
2001 info.line = ls_c_str (&line);
2002 info.len = ls_length (&line);
2003 info.beg = starts_beg;
2004 info.end = starts_end;
2008 info.max_occurs = occurs_left;
2009 code = rpd_parse_record (&info);
2012 occurs_left -= code;
2014 else if (cont_beg == 0)
2017 /* Make sure, if some occurrences are left, that we have
2018 continuation records. */
2019 if (occurs_left > 0 && cont_beg == 0)
2022 _("Number of repetitions specified on OCCURS (%d) "
2023 "exceed number of repetitions available in "
2024 "space on STARTS (%d), and CONTINUED not specified."),
2025 occurs, (starts_end - starts_beg + 1) / length);
2029 /* Go on to additional records. */
2030 while (occurs_left != 0)
2032 struct rpd_parse_info info;
2034 assert (occurs_left >= 0);
2036 /* Read in another record. */
2037 if (dfm_eof (t->handle))
2040 _("Unexpected end of file with %d repetitions "
2041 "remaining out of %d."),
2042 occurs_left, occurs);
2045 dfm_expand_tabs (t->handle);
2046 dfm_get_record (t->handle, &line);
2047 dfm_forward_record (t->handle);
2049 /* Parse this record. */
2051 info.line = ls_c_str (&line);
2052 info.len = ls_length (&line);
2053 info.beg = cont_beg;
2054 info.end = cont_end;
2058 info.max_occurs = occurs_left;
2059 code = rpd_parse_record (&info);;
2062 occurs_left -= code;
2065 dfm_pop (t->handle);
2067 /* FIXME: This is a kluge until we've implemented multiplexing of
2072 /* Frees a REPEATING DATA transformation. */
2074 repeating_data_trns_free (struct trns_header *rpd_)
2076 struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_;
2078 destroy_dls_var_spec (rpd->first);
2079 fh_close_handle (rpd->handle);
2080 free (rpd->id_value);
2083 /* Lets repeating_data_trns_proc() know how to write the cases
2084 that it composes. Not elegant. */
2086 repeating_data_set_write_case (struct trns_header *trns,
2087 write_case_func *write_case,
2088 write_case_data wc_data)
2090 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
2092 assert (trns->proc == repeating_data_trns_proc);
2093 t->write_case = write_case;
2094 t->wc_data = wc_data;