1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include "data-list.h"
31 #include "debug-print.h"
33 #include "dictionary.h"
35 #include "file-handle.h"
46 #define _(msgid) gettext (msgid)
48 /* Utility function. */
50 /* FIXME: Either REPEATING DATA must be the last transformation, or we
51 must multiplex the transformations that follow (i.e., perform them
52 for every case that we produce from a repetition instance).
53 Currently we do neither. We should do one or the other. */
55 /* Describes how to parse one variable. */
58 struct dls_var_spec *next; /* Next specification in list. */
60 /* Both free and fixed formats. */
61 struct fmt_spec input; /* Input format of this field. */
62 struct variable *v; /* Associated variable. Used only in
63 parsing. Not safe later. */
64 int fv; /* First value in case. */
66 /* Fixed format only. */
67 int rec; /* Record number (1-based). */
68 int fc, lc; /* Column numbers in record. */
70 /* Free format only. */
71 char name[LONG_NAME_LEN + 1]; /* Name of variable. */
74 /* Constants for DATA LIST type. */
75 /* Must match table in cmd_data_list(). */
83 /* DATA LIST private data structure. */
86 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
87 struct dfm_reader *reader; /* Data file reader. */
89 int type; /* A DLS_* constant. */
90 struct variable *end; /* Variable specified on END subcommand. */
91 int eof; /* End of file encountered. */
92 int rec_cnt; /* Number of records. */
93 size_t case_size; /* Case size in bytes. */
94 char *delims; /* Delimiters if any; not null-terminated. */
95 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
98 static int parse_fixed (struct data_list_pgm *);
99 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
100 static void dump_fixed_table (const struct dls_var_spec *,
101 const struct file_handle *, int rec_cnt);
102 static void dump_free_table (const struct data_list_pgm *,
103 const struct file_handle *);
104 static void destroy_dls_var_spec (struct dls_var_spec *);
105 static trns_free_func data_list_trns_free;
106 static trns_proc_func data_list_trns_proc;
108 /* Message title for REPEATING DATA. */
109 #define RPD_ERR "REPEATING DATA: "
114 struct data_list_pgm *dls; /* DATA LIST program under construction. */
115 int table = -1; /* Print table if nonzero, -1=undecided. */
116 struct file_handle *fh = NULL; /* File handle of source, NULL=inline file. */
118 if (!case_source_is_complex (vfm_source))
119 discard_variables ();
121 dls = xmalloc (sizeof *dls);
129 dls->first = dls->last = NULL;
133 if (lex_match_id ("FILE"))
139 if (case_source_is_class (vfm_source, &file_type_source_class)
140 && fh != default_handle)
142 msg (SE, _("DATA LIST may not use a different file from "
143 "that specified on its surrounding FILE TYPE."));
147 else if (lex_match_id ("RECORDS"))
151 if (!lex_force_int ())
153 dls->rec_cnt = lex_integer ();
157 else if (lex_match_id ("END"))
161 msg (SE, _("The END subcommand may only be specified once."));
166 if (!lex_force_id ())
168 dls->end = dict_lookup_var (default_dict, tokid);
170 dls->end = dict_create_var_assert (default_dict, tokid, 0);
173 else if (token == T_ID)
175 if (lex_match_id ("NOTABLE"))
177 else if (lex_match_id ("TABLE"))
182 if (lex_match_id ("FIXED"))
184 else if (lex_match_id ("FREE"))
186 else if (lex_match_id ("LIST"))
196 msg (SE, _("Only one of FIXED, FREE, or LIST may "
202 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
205 while (!lex_match (')'))
209 if (lex_match_id ("TAB"))
211 else if (token == T_STRING && tokstr.length == 1)
213 delim = tokstr.string[0];
222 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
223 dls->delims[dls->delim_cnt++] = delim;
237 dls->case_size = dict_get_case_size (default_dict);
241 dls->type = DLS_FIXED;
245 if (dls->type == DLS_FREE)
251 if (dls->type == DLS_FIXED)
253 if (!parse_fixed (dls))
256 dump_fixed_table (dls->first, fh, dls->rec_cnt);
260 if (!parse_free (&dls->first, &dls->last))
263 dump_free_table (dls, fh);
266 dls->reader = dfm_open_reader (fh);
267 if (dls->reader == NULL)
270 if (vfm_source != NULL)
271 add_transformation (data_list_trns_proc, data_list_trns_free, dls);
273 vfm_source = create_case_source (&data_list_source_class, dls);
278 data_list_trns_free (dls);
282 /* Adds SPEC to the linked list with head at FIRST and tail at
285 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
286 struct dls_var_spec *spec)
293 (*last)->next = spec;
297 /* Fixed-format parsing. */
299 /* Used for chaining together fortran-like format specifiers. */
302 struct fmt_list *next;
305 struct fmt_list *down;
308 /* State of parsing DATA LIST. */
309 struct fixed_parsing_state
311 char **name; /* Variable names. */
312 size_t name_cnt; /* Number of names. */
314 int recno; /* Index of current record. */
315 int sc; /* 1-based column number of starting column for
316 next field to output. */
319 static int fixed_parse_compatible (struct fixed_parsing_state *,
320 struct dls_var_spec **,
321 struct dls_var_spec **);
322 static int fixed_parse_fortran (struct fixed_parsing_state *,
323 struct dls_var_spec **,
324 struct dls_var_spec **);
326 /* Parses all the variable specifications for DATA LIST FIXED,
327 storing them into DLS. Returns nonzero if successful. */
329 parse_fixed (struct data_list_pgm *dls)
331 struct fixed_parsing_state fx;
339 while (lex_match ('/'))
342 if (lex_is_integer ())
344 if (lex_integer () < fx.recno)
346 msg (SE, _("The record number specified, %ld, is "
347 "before the previous record, %d. Data "
348 "fields must be listed in order of "
349 "increasing record number."),
350 lex_integer (), fx.recno - 1);
354 fx.recno = lex_integer ();
360 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
363 if (lex_is_number ())
365 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
368 else if (token == '(')
370 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
375 msg (SE, _("SPSS-like or FORTRAN-like format "
376 "specification expected after variable names."));
380 for (i = 0; i < fx.name_cnt; i++)
384 if (dls->first == NULL)
386 msg (SE, _("At least one variable must be specified."));
389 if (dls->rec_cnt && dls->last->rec > dls->rec_cnt)
391 msg (SE, _("Variables are specified on records that "
392 "should not exist according to RECORDS subcommand."));
395 else if (!dls->rec_cnt)
396 dls->rec_cnt = dls->last->rec;
397 return lex_end_of_command () == CMD_SUCCESS;
400 for (i = 0; i < fx.name_cnt; i++)
406 /* Parses a variable specification in the form 1-10 (A) based on
407 FX and adds specifications to the linked list with head at
408 FIRST and tail at LAST. */
410 fixed_parse_compatible (struct fixed_parsing_state *fx,
411 struct dls_var_spec **first, struct dls_var_spec **last)
413 struct fmt_spec input;
419 if (!lex_force_int ())
424 msg (SE, _("Column positions for fields must be positive."));
430 lex_negative_to_dash ();
433 if (!lex_force_int ())
438 msg (SE, _("Column positions for fields must be positive."));
443 msg (SE, _("The ending column for a field must be "
444 "greater than the starting column."));
453 /* Divide columns evenly. */
454 input.w = (lc - fc + 1) / fx->name_cnt;
455 if ((lc - fc + 1) % fx->name_cnt)
457 msg (SE, _("The %d columns %d-%d "
458 "can't be evenly divided into %d fields."),
459 lc - fc + 1, fc, lc, fx->name_cnt);
463 /* Format specifier. */
466 struct fmt_desc *fdp;
472 input.type = parse_format_specifier_name (&cp, 0);
473 if (input.type == -1)
477 msg (SE, _("A format specifier on this line "
478 "has extra characters on the end."));
488 if (lex_is_integer ())
490 if (lex_integer () < 1)
492 msg (SE, _("The value for number of decimal places "
493 "must be at least 1."));
497 input.d = lex_integer ();
503 fdp = &formats[input.type];
504 if (fdp->n_args < 2 && input.d)
506 msg (SE, _("Input format %s doesn't accept decimal places."),
514 if (!lex_force_match (')'))
522 if (!check_input_specifier (&input, 1))
525 /* Start column for next specification. */
528 /* Width of variables to create. */
529 if (input.type == FMT_A || input.type == FMT_AHEX)
534 /* Create variables and var specs. */
535 for (i = 0; i < fx->name_cnt; i++)
537 struct dls_var_spec *spec;
540 v = dict_create_var (default_dict, fx->name[i], width);
543 convert_fmt_ItoO (&input, &v->print);
545 if (!case_source_is_complex (vfm_source))
550 v = dict_lookup_var_assert (default_dict, fx->name[i]);
551 if (vfm_source == NULL)
553 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
556 if ((width != 0) != (v->width != 0))
558 msg (SE, _("There is already a variable %s of a "
563 if (width != 0 && width != v->width)
565 msg (SE, _("There is already a string variable %s of a "
566 "different width."), fx->name[i]);
571 spec = xmalloc (sizeof *spec);
575 spec->rec = fx->recno;
576 spec->fc = fc + input.w * i;
577 spec->lc = spec->fc + input.w - 1;
578 append_var_spec (first, last, spec);
583 /* Destroy format list F and, if RECURSE is nonzero, all its
586 destroy_fmt_list (struct fmt_list *f, int recurse)
588 struct fmt_list *next;
593 if (recurse && f->f.type == FMT_DESCEND)
594 destroy_fmt_list (f->down, 1);
599 /* Takes a hierarchically structured fmt_list F as constructed by
600 fixed_parse_fortran(), and flattens it, adding the variable
601 specifications to the linked list with head FIRST and tail
602 LAST. NAME_IDX is used to take values from the list of names
603 in FX; it should initially point to a value of 0. */
605 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
606 struct dls_var_spec **first, struct dls_var_spec **last,
611 for (; f; f = f->next)
612 if (f->f.type == FMT_X)
614 else if (f->f.type == FMT_T)
616 else if (f->f.type == FMT_NEWREC)
618 fx->recno += f->count;
622 for (i = 0; i < f->count; i++)
623 if (f->f.type == FMT_DESCEND)
625 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
630 struct dls_var_spec *spec;
634 if (formats[f->f.type].cat & FCAT_STRING)
638 if (*name_idx >= fx->name_cnt)
640 msg (SE, _("The number of format "
641 "specifications exceeds the given number of "
646 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
649 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
653 if (!case_source_is_complex (vfm_source))
656 spec = xmalloc (sizeof *spec);
660 spec->rec = fx->recno;
662 spec->lc = fx->sc + f->f.w - 1;
663 append_var_spec (first, last, spec);
665 convert_fmt_ItoO (&spec->input, &v->print);
673 /* Recursively parses a FORTRAN-like format specification into
674 the linked list with head FIRST and tail TAIL. LEVEL is the
675 level of recursion, starting from 0. Returns the parsed
676 specification if successful, or a null pointer on failure. */
677 static struct fmt_list *
678 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
679 struct dls_var_spec **first,
680 struct dls_var_spec **last)
682 struct fmt_list *head = NULL;
683 struct fmt_list *tail = NULL;
685 lex_force_match ('(');
689 struct fmt_list *new = xmalloc (sizeof *new);
692 /* Append new to list. */
700 if (lex_is_integer ())
702 new->count = lex_integer ();
708 /* Parse format specifier. */
711 new->f.type = FMT_DESCEND;
712 new->down = fixed_parse_fortran_internal (fx, first, last);
713 if (new->down == NULL)
716 else if (lex_match ('/'))
717 new->f.type = FMT_NEWREC;
718 else if (!parse_format_specifier (&new->f, FMTP_ALLOW_XT)
719 || !check_input_specifier (&new->f, 1))
724 lex_force_match (')');
729 destroy_fmt_list (head, 0);
734 /* Parses a FORTRAN-like format specification into the linked
735 list with head FIRST and tail LAST. Returns nonzero if
738 fixed_parse_fortran (struct fixed_parsing_state *fx,
739 struct dls_var_spec **first, struct dls_var_spec **last)
741 struct fmt_list *list;
744 list = fixed_parse_fortran_internal (fx, first, last);
749 dump_fmt_list (fx, list, first, last, &name_idx);
750 destroy_fmt_list (list, 1);
751 if (name_idx < fx->name_cnt)
753 msg (SE, _("There aren't enough format specifications "
754 "to match the number of variable names given."));
761 /* Displays a table giving information on fixed-format variable
762 parsing on DATA LIST. */
763 /* FIXME: The `Columns' column should be divided into three columns,
764 one for the starting column, one for the dash, one for the ending
765 column; then right-justify the starting column and left-justify the
768 dump_fixed_table (const struct dls_var_spec *specs,
769 const struct file_handle *fh, int rec_cnt)
771 const struct dls_var_spec *spec;
775 for (i = 0, spec = specs; spec; spec = spec->next)
777 t = tab_create (4, i + 1, 0);
778 tab_columns (t, TAB_COL_DOWN, 1);
779 tab_headers (t, 0, 0, 1, 0);
780 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
781 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
782 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
783 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
784 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
785 tab_hline (t, TAL_2, 0, 3, 1);
786 tab_dim (t, tab_natural_dimensions);
788 for (i = 1, spec = specs; spec; spec = spec->next, i++)
790 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
791 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
792 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
794 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
795 fmt_to_string (&spec->input));
799 tab_title (t, 1, ngettext ("Reading %d record from file %s.",
800 "Reading %d records from file %s.", rec_cnt),
801 rec_cnt, handle_get_filename (fh));
803 tab_title (t, 1, ngettext ("Reading %d record from the command file.",
804 "Reading %d records from the command file.",
810 /* Free-format parsing. */
812 /* Parses variable specifications for DATA LIST FREE and adds
813 them to the linked list with head FIRST and tail LAST.
814 Returns nonzero only if successful. */
816 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
821 struct fmt_spec input, output;
827 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
832 if (!parse_format_specifier (&input, 0)
833 || !check_input_specifier (&input, 1)
834 || !lex_force_match (')'))
836 for (i = 0; i < name_cnt; i++)
841 convert_fmt_ItoO (&input, &output);
846 input = make_input_format (FMT_F, 8, 0);
847 output = *get_format ();
850 if (input.type == FMT_A || input.type == FMT_AHEX)
854 for (i = 0; i < name_cnt; i++)
856 struct dls_var_spec *spec;
859 v = dict_create_var (default_dict, name[i], width);
863 msg (SE, _("%s is a duplicate variable name."), name[i]);
866 v->print = v->write = output;
868 if (!case_source_is_complex (vfm_source))
871 spec = xmalloc (sizeof *spec);
875 str_copy_trunc (spec->name, sizeof spec->name, v->name);
876 append_var_spec (first, last, spec);
878 for (i = 0; i < name_cnt; i++)
883 return lex_end_of_command () == CMD_SUCCESS;
886 /* Displays a table giving information on free-format variable parsing
889 dump_free_table (const struct data_list_pgm *dls,
890 const struct file_handle *fh)
896 struct dls_var_spec *spec;
897 for (i = 0, spec = dls->first; spec; spec = spec->next)
901 t = tab_create (2, i + 1, 0);
902 tab_columns (t, TAB_COL_DOWN, 1);
903 tab_headers (t, 0, 0, 1, 0);
904 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
905 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
906 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
907 tab_hline (t, TAL_2, 0, 1, 1);
908 tab_dim (t, tab_natural_dimensions);
911 struct dls_var_spec *spec;
913 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
915 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
916 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
921 tab_title (t, 1, _("Reading free-form data from file %s."),
922 handle_get_filename (fh));
924 tab_title (t, 1, _("Reading free-form data from the command file."));
929 /* Input procedure. */
931 /* Extracts a field from the current position in the current
932 record. Fields can be unquoted or quoted with single- or
933 double-quote characters. *FIELD is set to the field content.
934 After parsing the field, sets the current position in the
935 record to just past the field and any trailing delimiter.
936 END_BLANK is used internally; it should be initialized by the
937 caller to 0 and left alone afterward. Returns 0 on failure or
938 a 1-based column number indicating the beginning of the field
941 cut_field (const struct data_list_pgm *dls, struct fixed_string *field,
944 struct fixed_string line;
948 if (dfm_eof (dls->reader))
950 if (dls->delim_cnt == 0)
951 dfm_expand_tabs (dls->reader);
952 dfm_get_record (dls->reader, &line);
954 cp = ls_c_str (&line);
955 if (dls->delim_cnt == 0)
957 /* Skip leading whitespace. */
958 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
960 if (cp >= ls_end (&line))
963 /* Handle actual data, whether quoted or unquoted. */
964 if (*cp == '\'' || *cp == '"')
968 field->string = ++cp;
969 while (cp < ls_end (&line) && *cp != quote)
971 field->length = cp - field->string;
972 if (cp < ls_end (&line))
975 msg (SW, _("Quoted string missing terminating `%c'."), quote);
980 while (cp < ls_end (&line)
981 && !isspace ((unsigned char) *cp) && *cp != ',')
983 field->length = cp - field->string;
986 /* Skip trailing whitespace and a single comma if present. */
987 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
989 if (cp < ls_end (&line) && *cp == ',')
994 if (cp >= ls_end (&line))
996 int column = dfm_column_start (dls->reader);
997 /* A blank line or a line that ends in \t has a
998 trailing blank field. */
999 if (column == 1 || (column > 1 && cp[-1] == '\t'))
1001 if (*end_blank == 0)
1004 field->string = ls_end (&line);
1006 dfm_forward_record (dls->reader);
1021 while (cp < ls_end (&line)
1022 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1024 field->length = cp - field->string;
1025 if (cp < ls_end (&line))
1030 dfm_forward_columns (dls->reader, field->string - line.string);
1031 column_start = dfm_column_start (dls->reader);
1033 dfm_forward_columns (dls->reader, cp - field->string);
1035 return column_start;
1038 typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *);
1039 static data_list_read_func read_from_data_list_fixed;
1040 static data_list_read_func read_from_data_list_free;
1041 static data_list_read_func read_from_data_list_list;
1043 /* Returns the proper function to read the kind of DATA LIST
1044 data specified by DLS. */
1045 static data_list_read_func *
1046 get_data_list_read_func (const struct data_list_pgm *dls)
1051 return read_from_data_list_fixed;
1054 return read_from_data_list_free;
1057 return read_from_data_list_list;
1065 /* Reads a case from the data file into C, parsing it according
1066 to fixed-format syntax rules in DLS. Returns -1 on success,
1067 -2 at end of file. */
1069 read_from_data_list_fixed (const struct data_list_pgm *dls,
1072 struct dls_var_spec *var_spec = dls->first;
1075 if (dfm_eof (dls->reader))
1077 for (i = 1; i <= dls->rec_cnt; i++)
1079 struct fixed_string line;
1081 if (dfm_eof (dls->reader))
1083 /* Note that this can't occur on the first record. */
1084 msg (SW, _("Partial case of %d of %d records discarded."),
1085 i - 1, dls->rec_cnt);
1088 dfm_expand_tabs (dls->reader);
1089 dfm_get_record (dls->reader, &line);
1091 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1095 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1096 var_spec->fc, var_spec->lc);
1097 di.v = case_data_rw (c, var_spec->fv);
1098 di.flags = DI_IMPLIED_DECIMALS;
1099 di.f1 = var_spec->fc;
1100 di.format = var_spec->input;
1105 dfm_forward_record (dls->reader);
1111 /* Reads a case from the data file into C, parsing it according
1112 to free-format syntax rules in DLS. Returns -1 on success,
1113 -2 at end of file. */
1115 read_from_data_list_free (const struct data_list_pgm *dls,
1118 struct dls_var_spec *var_spec;
1121 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1123 struct fixed_string field;
1126 /* Cut out a field and read in a new record if necessary. */
1129 column = cut_field (dls, &field, &end_blank);
1133 if (!dfm_eof (dls->reader))
1134 dfm_forward_record (dls->reader);
1135 if (dfm_eof (dls->reader))
1137 if (var_spec != dls->first)
1138 msg (SW, _("Partial case discarded. The first variable "
1139 "missing was %s."), var_spec->name);
1147 di.s = ls_c_str (&field);
1148 di.e = ls_end (&field);
1149 di.v = case_data_rw (c, var_spec->fv);
1152 di.format = var_spec->input;
1159 /* Reads a case from the data file and parses it according to
1160 list-format syntax rules. Returns -1 on success, -2 at end of
1163 read_from_data_list_list (const struct data_list_pgm *dls,
1166 struct dls_var_spec *var_spec;
1169 if (dfm_eof (dls->reader))
1172 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1174 struct fixed_string field;
1177 /* Cut out a field and check for end-of-line. */
1178 column = cut_field (dls, &field, &end_blank);
1181 if (get_undefined ())
1182 msg (SW, _("Missing value(s) for all variables from %s onward. "
1183 "These will be filled with the system-missing value "
1184 "or blanks, as appropriate."),
1186 for (; var_spec; var_spec = var_spec->next)
1188 int width = get_format_var_width (&var_spec->input);
1190 case_data_rw (c, var_spec->fv)->f = SYSMIS;
1192 memset (case_data_rw (c, var_spec->fv)->s, ' ', width);
1200 di.s = ls_c_str (&field);
1201 di.e = ls_end (&field);
1202 di.v = case_data_rw (c, var_spec->fv);
1205 di.format = var_spec->input;
1210 dfm_forward_record (dls->reader);
1214 /* Destroys SPEC. */
1216 destroy_dls_var_spec (struct dls_var_spec *spec)
1218 struct dls_var_spec *next;
1220 while (spec != NULL)
1228 /* Destroys DATA LIST transformation DLS. */
1230 data_list_trns_free (void *dls_)
1232 struct data_list_pgm *dls = dls_;
1234 destroy_dls_var_spec (dls->first);
1235 dfm_close_reader (dls->reader);
1239 /* Handle DATA LIST transformation DLS, parsing data into C. */
1241 data_list_trns_proc (void *dls_, struct ccase *c, int case_num UNUSED)
1243 struct data_list_pgm *dls = dls_;
1244 data_list_read_func *read_func;
1247 dfm_push (dls->reader);
1249 read_func = get_data_list_read_func (dls);
1250 retval = read_func (dls, c);
1252 /* Handle end of file. */
1255 /* If we already encountered end of file then this is an
1259 msg (SE, _("Attempt to read past end of file."));
1261 dfm_pop (dls->reader);
1265 /* Otherwise simply note it. */
1271 /* If there was an END subcommand handle it. */
1272 if (dls->end != NULL)
1276 case_data_rw (c, dls->end->fv)->f = 1.0;
1280 case_data_rw (c, dls->end->fv)->f = 0.0;
1283 dfm_pop (dls->reader);
1288 /* Reads all the records from the data file and passes them to
1291 data_list_source_read (struct case_source *source,
1293 write_case_func *write_case, write_case_data wc_data)
1295 struct data_list_pgm *dls = source->aux;
1296 data_list_read_func *read_func = get_data_list_read_func (dls);
1298 dfm_push (dls->reader);
1299 while (read_func (dls, c) != -2)
1300 if (!write_case (wc_data))
1302 dfm_pop (dls->reader);
1305 /* Destroys the source's internal data. */
1307 data_list_source_destroy (struct case_source *source)
1309 data_list_trns_free (source->aux);
1312 const struct case_source_class data_list_source_class =
1316 data_list_source_read,
1317 data_list_source_destroy,
1320 /* REPEATING DATA. */
1322 /* Represents a number or a variable. */
1323 struct rpd_num_or_var
1325 int num; /* Value, or 0. */
1326 struct variable *var; /* Variable, if number==0. */
1329 /* REPEATING DATA private data structure. */
1330 struct repeating_data_trns
1332 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1333 struct dfm_reader *reader; /* Input file, never NULL. */
1335 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1336 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1337 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1338 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1339 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1340 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1342 /* ID subcommand. */
1343 int id_beg, id_end; /* Beginning & end columns. */
1344 struct variable *id_var; /* DATA LIST variable. */
1345 struct fmt_spec id_spec; /* Input format spec. */
1346 union value *id_value; /* ID value. */
1348 write_case_func *write_case;
1349 write_case_data wc_data;
1352 static trns_free_func repeating_data_trns_free;
1353 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1354 static int parse_repeating_data (struct dls_var_spec **,
1355 struct dls_var_spec **);
1356 static void find_variable_input_spec (struct variable *v,
1357 struct fmt_spec *spec);
1359 /* Parses the REPEATING DATA command. */
1361 cmd_repeating_data (void)
1363 struct repeating_data_trns *rpd;
1364 int table = 1; /* Print table? */
1365 bool saw_starts = false; /* Saw STARTS subcommand? */
1366 bool saw_occurs = false; /* Saw OCCURS subcommand? */
1367 bool saw_length = false; /* Saw LENGTH subcommand? */
1368 bool saw_continued = false; /* Saw CONTINUED subcommand? */
1369 bool saw_id = false; /* Saw ID subcommand? */
1370 struct file_handle *const fh = default_handle;
1372 assert (case_source_is_complex (vfm_source));
1374 rpd = xmalloc (sizeof *rpd);
1375 rpd->reader = dfm_open_reader (default_handle);
1376 rpd->first = rpd->last = NULL;
1377 rpd->starts_beg.num = 0;
1378 rpd->starts_beg.var = NULL;
1379 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1380 = rpd->cont_end = rpd->starts_beg;
1381 rpd->id_beg = rpd->id_end = 0;
1383 rpd->id_value = NULL;
1389 if (lex_match_id ("FILE"))
1391 struct file_handle *file;
1398 msg (SE, _("REPEATING DATA must use the same file as its "
1399 "corresponding DATA LIST or FILE TYPE."));
1403 else if (lex_match_id ("STARTS"))
1408 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1413 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1416 lex_negative_to_dash ();
1417 if (lex_match ('-'))
1419 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1422 /* Otherwise, rpd->starts_end is uninitialized. We
1423 will initialize it later from the record length
1424 of the file. We can't do so now because the
1425 file handle may not be specified yet. */
1428 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1429 && rpd->starts_beg.num > rpd->starts_end.num)
1431 msg (SE, _("STARTS beginning column (%d) exceeds "
1432 "STARTS ending column (%d)."),
1433 rpd->starts_beg.num, rpd->starts_end.num);
1437 else if (lex_match_id ("OCCURS"))
1442 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1447 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1450 else if (lex_match_id ("LENGTH"))
1455 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1460 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1463 else if (lex_match_id ("CONTINUED"))
1468 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1471 saw_continued = true;
1473 if (!lex_match ('/'))
1475 if (!parse_num_or_var (&rpd->cont_beg,
1476 "CONTINUED beginning column"))
1479 lex_negative_to_dash ();
1481 && !parse_num_or_var (&rpd->cont_end,
1482 "CONTINUED ending column"))
1485 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1486 && rpd->cont_beg.num > rpd->cont_end.num)
1488 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1489 "CONTINUED ending column (%d)."),
1490 rpd->cont_beg.num, rpd->cont_end.num);
1495 rpd->cont_beg.num = 1;
1497 else if (lex_match_id ("ID"))
1502 msg (SE, _("%s subcommand given multiple times."),"ID");
1507 if (!lex_force_int ())
1509 if (lex_integer () < 1)
1511 msg (SE, _("ID beginning column (%ld) must be positive."),
1515 rpd->id_beg = lex_integer ();
1518 lex_negative_to_dash ();
1520 if (lex_match ('-'))
1522 if (!lex_force_int ())
1524 if (lex_integer () < 1)
1526 msg (SE, _("ID ending column (%ld) must be positive."),
1530 if (lex_integer () < rpd->id_end)
1532 msg (SE, _("ID ending column (%ld) cannot be less than "
1533 "ID beginning column (%d)."),
1534 lex_integer (), rpd->id_beg);
1538 rpd->id_end = lex_integer ();
1541 else rpd->id_end = rpd->id_beg;
1543 if (!lex_force_match ('='))
1545 rpd->id_var = parse_variable ();
1546 if (rpd->id_var == NULL)
1549 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1550 rpd->id_value = xnmalloc (rpd->id_var->nv, sizeof *rpd->id_value);
1552 else if (lex_match_id ("TABLE"))
1554 else if (lex_match_id ("NOTABLE"))
1556 else if (lex_match_id ("DATA"))
1564 if (!lex_force_match ('/'))
1568 /* Comes here when DATA specification encountered. */
1569 if (!saw_starts || !saw_occurs)
1572 msg (SE, _("Missing required specification STARTS."));
1574 msg (SE, _("Missing required specification OCCURS."));
1578 /* Enforce ID restriction. */
1579 if (saw_id && !saw_continued)
1581 msg (SE, _("ID specified without CONTINUED."));
1585 /* Calculate and check starts_end, cont_end if necessary. */
1586 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1588 rpd->starts_end.num = fh != NULL ? handle_get_record_width (fh) : 80;
1589 if (rpd->starts_beg.num != 0
1590 && rpd->starts_beg.num > rpd->starts_end.num)
1592 msg (SE, _("STARTS beginning column (%d) exceeds "
1593 "default STARTS ending column taken from file's "
1594 "record width (%d)."),
1595 rpd->starts_beg.num, rpd->starts_end.num);
1599 if (rpd->cont_end.num == 0 && rpd->cont_end.var == NULL)
1601 rpd->cont_end.num = fh != NULL ? handle_get_record_width (fh) : 80;
1602 if (rpd->cont_beg.num != 0
1603 && rpd->cont_beg.num > rpd->cont_end.num)
1605 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1606 "default CONTINUED ending column taken from file's "
1607 "record width (%d)."),
1608 rpd->cont_beg.num, rpd->cont_end.num);
1614 if (!parse_repeating_data (&rpd->first, &rpd->last))
1617 /* Calculate length if necessary. */
1620 struct dls_var_spec *iter;
1622 for (iter = rpd->first; iter; iter = iter->next)
1623 if (iter->lc > rpd->length.num)
1624 rpd->length.num = iter->lc;
1625 assert (rpd->length.num != 0);
1629 dump_fixed_table (rpd->first, fh, rpd->last->rec);
1631 add_transformation (repeating_data_trns_proc, repeating_data_trns_free, rpd);
1633 return lex_end_of_command ();
1636 repeating_data_trns_free (rpd);
1640 /* Finds the input format specification for variable V and puts
1641 it in SPEC. Because of the way that DATA LIST is structured,
1642 this is nontrivial. */
1644 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1648 for (i = 0; i < n_trns; i++)
1650 struct transformation *trns = &t_trns[i];
1652 if (trns->proc == data_list_trns_proc)
1654 struct data_list_pgm *pgm = trns->private;
1655 struct dls_var_spec *iter;
1657 for (iter = pgm->first; iter; iter = iter->next)
1660 *spec = iter->input;
1669 /* Parses a number or a variable name from the syntax file and puts
1670 the results in VALUE. Ensures that the number is at least 1; else
1671 emits an error based on MESSAGE. Returns nonzero only if
1674 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1679 value->var = parse_variable ();
1680 if (value->var == NULL)
1682 if (value->var->type == ALPHA)
1684 msg (SE, _("String variable not allowed here."));
1688 else if (lex_is_integer ())
1690 value->num = lex_integer ();
1694 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1700 msg (SE, _("Variable or integer expected for %s."), message);
1706 /* Parses data specifications for repeating data groups, adding
1707 them to the linked list with head FIRST and tail LAST.
1708 Returns nonzero only if successful. */
1710 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1712 struct fixed_parsing_state fx;
1718 while (token != '.')
1720 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1723 if (lex_is_number ())
1725 if (!fixed_parse_compatible (&fx, first, last))
1728 else if (token == '(')
1730 if (!fixed_parse_fortran (&fx, first, last))
1735 msg (SE, _("SPSS-like or FORTRAN-like format "
1736 "specification expected after variable names."));
1740 for (i = 0; i < fx.name_cnt; i++)
1748 for (i = 0; i < fx.name_cnt; i++)
1754 /* Obtains the real value for rpd_num_or_var N in case C and returns
1755 it. The valid range is nonnegative numbers, but numbers outside
1756 this range can be returned and should be handled by the caller as
1759 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1763 double v = case_num (c, n->var->fv);
1764 return v != SYSMIS && v >= INT_MIN && v <= INT_MAX ? v : -1;
1770 /* Parameter record passed to rpd_parse_record(). */
1771 struct rpd_parse_info
1773 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1774 const char *line; /* Line being parsed. */
1775 size_t len; /* Line length. */
1776 int beg, end; /* First and last column of first occurrence. */
1777 int ofs; /* Column offset between repeated occurrences. */
1778 struct ccase *c; /* Case to fill in. */
1779 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1780 int max_occurs; /* Max number of occurrences to parse. */
1783 /* Parses one record of repeated data and outputs corresponding
1784 cases. Returns number of occurrences parsed up to the
1785 maximum specified in INFO. */
1787 rpd_parse_record (const struct rpd_parse_info *info)
1789 struct repeating_data_trns *t = info->trns;
1790 int cur = info->beg;
1793 /* Handle record ID values. */
1796 union value id_temp[MAX_ELEMS_PER_VALUE];
1798 /* Parse record ID into V. */
1802 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1803 di.v = info->verify_id ? id_temp : t->id_value;
1806 di.format = t->id_spec;
1813 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1815 char expected_str [MAX_FORMATTED_LEN + 1];
1816 char actual_str [MAX_FORMATTED_LEN + 1];
1818 data_out (expected_str, &t->id_var->print, t->id_value);
1819 expected_str[t->id_var->print.w] = '\0';
1821 data_out (actual_str, &t->id_var->print, id_temp);
1822 actual_str[t->id_var->print.w] = '\0';
1825 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1826 actual_str, expected_str);
1832 /* Iterate over the set of expected occurrences and record each of
1833 them as a separate case. FIXME: We need to execute any
1834 transformations that follow the current one. */
1838 for (occurrences = 0; occurrences < info->max_occurs; )
1840 if (cur + info->ofs > info->end + 1)
1845 struct dls_var_spec *var_spec = t->first;
1847 for (; var_spec; var_spec = var_spec->next)
1849 int fc = var_spec->fc - 1 + cur;
1850 int lc = var_spec->lc - 1 + cur;
1852 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1857 _("Variable %s starting in column %d extends "
1858 "beyond physical record length of %d."),
1859 var_spec->v->name, fc, info->len);
1865 data_in_finite_line (&di, info->line, info->len, fc, lc);
1866 di.v = case_data_rw (info->c, var_spec->fv);
1869 di.format = var_spec->input;
1879 if (!t->write_case (t->wc_data))
1887 /* Reads one set of repetitions of the elements in the REPEATING
1888 DATA structure. Returns -1 on success, -2 on end of file or
1891 repeating_data_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
1893 struct repeating_data_trns *t = trns_;
1895 struct fixed_string line; /* Current record. */
1897 int starts_beg; /* Starting column. */
1898 int starts_end; /* Ending column. */
1899 int occurs; /* Number of repetitions. */
1900 int length; /* Length of each occurrence. */
1901 int cont_beg; /* Starting column for continuation lines. */
1902 int cont_end; /* Ending column for continuation lines. */
1904 int occurs_left; /* Number of occurrences remaining. */
1906 int code; /* Return value from rpd_parse_record(). */
1908 int skip_first_record = 0;
1910 dfm_push (t->reader);
1912 /* Read the current record. */
1913 dfm_reread_record (t->reader, 1);
1914 dfm_expand_tabs (t->reader);
1915 if (dfm_eof (t->reader))
1917 dfm_get_record (t->reader, &line);
1918 dfm_forward_record (t->reader);
1920 /* Calculate occurs, length. */
1921 occurs_left = occurs = realize_value (&t->occurs, c);
1924 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1927 starts_beg = realize_value (&t->starts_beg, c);
1928 if (starts_beg <= 0)
1930 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1935 starts_end = realize_value (&t->starts_end, c);
1936 if (starts_end < starts_beg)
1938 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1939 "beginning column (%d)."),
1940 starts_end, starts_beg);
1941 skip_first_record = 1;
1943 length = realize_value (&t->length, c);
1946 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1948 occurs = occurs_left = 1;
1950 cont_beg = realize_value (&t->cont_beg, c);
1953 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1958 cont_end = realize_value (&t->cont_end, c);
1959 if (cont_end < cont_beg)
1961 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1962 "beginning column (%d)."),
1963 cont_end, cont_beg);
1967 /* Parse the first record. */
1968 if (!skip_first_record)
1970 struct rpd_parse_info info;
1972 info.line = ls_c_str (&line);
1973 info.len = ls_length (&line);
1974 info.beg = starts_beg;
1975 info.end = starts_end;
1979 info.max_occurs = occurs_left;
1980 code = rpd_parse_record (&info);
1983 occurs_left -= code;
1985 else if (cont_beg == 0)
1988 /* Make sure, if some occurrences are left, that we have
1989 continuation records. */
1990 if (occurs_left > 0 && cont_beg == 0)
1993 _("Number of repetitions specified on OCCURS (%d) "
1994 "exceed number of repetitions available in "
1995 "space on STARTS (%d), and CONTINUED not specified."),
1996 occurs, (starts_end - starts_beg + 1) / length);
2000 /* Go on to additional records. */
2001 while (occurs_left != 0)
2003 struct rpd_parse_info info;
2005 assert (occurs_left >= 0);
2007 /* Read in another record. */
2008 if (dfm_eof (t->reader))
2011 _("Unexpected end of file with %d repetitions "
2012 "remaining out of %d."),
2013 occurs_left, occurs);
2016 dfm_expand_tabs (t->reader);
2017 dfm_get_record (t->reader, &line);
2018 dfm_forward_record (t->reader);
2020 /* Parse this record. */
2022 info.line = ls_c_str (&line);
2023 info.len = ls_length (&line);
2024 info.beg = cont_beg;
2025 info.end = cont_end;
2029 info.max_occurs = occurs_left;
2030 code = rpd_parse_record (&info);;
2033 occurs_left -= code;
2036 dfm_pop (t->reader);
2038 /* FIXME: This is a kluge until we've implemented multiplexing of
2043 /* Frees a REPEATING DATA transformation. */
2045 repeating_data_trns_free (void *rpd_)
2047 struct repeating_data_trns *rpd = rpd_;
2049 destroy_dls_var_spec (rpd->first);
2050 dfm_close_reader (rpd->reader);
2051 free (rpd->id_value);
2055 /* Lets repeating_data_trns_proc() know how to write the cases
2056 that it composes. Not elegant. */
2058 repeating_data_set_write_case (struct transformation *trns_,
2059 write_case_func *write_case,
2060 write_case_data wc_data)
2062 struct repeating_data_trns *t = trns_->private;
2064 assert (trns_->proc == repeating_data_trns_proc);
2065 t->write_case = write_case;
2066 t->wc_data = wc_data;