1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include "data-list.h"
31 #include "debug-print.h"
33 #include "dictionary.h"
35 #include "file-handle.h"
45 /* Utility function. */
47 /* FIXME: Either REPEATING DATA must be the last transformation, or we
48 must multiplex the transformations that follow (i.e., perform them
49 for every case that we produce from a repetition instance).
50 Currently we do neither. We should do one or the other. */
52 /* Describes how to parse one variable. */
55 struct dls_var_spec *next; /* Next specification in list. */
57 /* Both free and fixed formats. */
58 struct fmt_spec input; /* Input format of this field. */
59 struct variable *v; /* Associated variable. Used only in
60 parsing. Not safe later. */
61 int fv; /* First value in case. */
63 /* Fixed format only. */
64 int rec; /* Record number (1-based). */
65 int fc, lc; /* Column numbers in record. */
67 /* Free format only. */
68 char name[LONG_NAME_LEN + 1]; /* Name of variable. */
71 /* Constants for DATA LIST type. */
72 /* Must match table in cmd_data_list(). */
80 /* DATA LIST private data structure. */
85 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
86 struct dfm_reader *reader; /* Data file reader. */
88 int type; /* A DLS_* constant. */
89 struct variable *end; /* Variable specified on END subcommand. */
90 int eof; /* End of file encountered. */
91 int rec_cnt; /* Number of records. */
92 size_t case_size; /* Case size in bytes. */
93 char *delims; /* Delimiters if any; not null-terminated. */
94 size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */
97 static int parse_fixed (struct data_list_pgm *);
98 static int parse_free (struct dls_var_spec **, struct dls_var_spec **);
99 static void dump_fixed_table (const struct dls_var_spec *,
100 const struct file_handle *, int rec_cnt);
101 static void dump_free_table (const struct data_list_pgm *,
102 const struct file_handle *);
103 static void destroy_dls_var_spec (struct dls_var_spec *);
104 static trns_free_func data_list_trns_free;
105 static trns_proc_func data_list_trns_proc;
107 /* Message title for REPEATING DATA. */
108 #define RPD_ERR "REPEATING DATA: "
113 struct data_list_pgm *dls; /* DATA LIST program under construction. */
114 int table = -1; /* Print table if nonzero, -1=undecided. */
115 struct file_handle *fh = NULL; /* File handle of source, NULL=inline file. */
117 if (!case_source_is_complex (vfm_source))
118 discard_variables ();
120 dls = xmalloc (sizeof *dls);
128 dls->first = dls->last = NULL;
132 if (lex_match_id ("FILE"))
138 if (case_source_is_class (vfm_source, &file_type_source_class)
139 && fh != default_handle)
141 msg (SE, _("DATA LIST may not use a different file from "
142 "that specified on its surrounding FILE TYPE."));
146 else if (lex_match_id ("RECORDS"))
150 if (!lex_force_int ())
152 dls->rec_cnt = lex_integer ();
156 else if (lex_match_id ("END"))
160 msg (SE, _("The END subcommand may only be specified once."));
165 if (!lex_force_id ())
167 dls->end = dict_lookup_var (default_dict, tokid);
169 dls->end = dict_create_var_assert (default_dict, tokid, 0);
172 else if (token == T_ID)
174 if (lex_match_id ("NOTABLE"))
176 else if (lex_match_id ("TABLE"))
181 if (lex_match_id ("FIXED"))
183 else if (lex_match_id ("FREE"))
185 else if (lex_match_id ("LIST"))
195 msg (SE, _("Only one of FIXED, FREE, or LIST may "
201 if ((dls->type == DLS_FREE || dls->type == DLS_LIST)
204 while (!lex_match (')'))
208 if (lex_match_id ("TAB"))
210 else if (token == T_STRING && tokstr.length == 1)
212 delim = tokstr.string[0];
221 dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1);
222 dls->delims[dls->delim_cnt++] = delim;
236 dls->case_size = dict_get_case_size (default_dict);
240 dls->type = DLS_FIXED;
244 if (dls->type == DLS_FREE)
250 if (dls->type == DLS_FIXED)
252 if (!parse_fixed (dls))
255 dump_fixed_table (dls->first, fh, dls->rec_cnt);
259 if (!parse_free (&dls->first, &dls->last))
262 dump_free_table (dls, fh);
265 dls->reader = dfm_open_reader (fh);
266 if (dls->reader == NULL)
269 if (vfm_source != NULL)
271 dls->h.proc = data_list_trns_proc;
272 dls->h.free = data_list_trns_free;
273 add_transformation (&dls->h);
276 vfm_source = create_case_source (&data_list_source_class, dls);
281 destroy_dls_var_spec (dls->first);
287 /* Adds SPEC to the linked list with head at FIRST and tail at
290 append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last,
291 struct dls_var_spec *spec)
298 (*last)->next = spec;
302 /* Fixed-format parsing. */
304 /* Used for chaining together fortran-like format specifiers. */
307 struct fmt_list *next;
310 struct fmt_list *down;
313 /* State of parsing DATA LIST. */
314 struct fixed_parsing_state
316 char **name; /* Variable names. */
317 int name_cnt; /* Number of names. */
319 int recno; /* Index of current record. */
320 int sc; /* 1-based column number of starting column for
321 next field to output. */
324 static int fixed_parse_compatible (struct fixed_parsing_state *,
325 struct dls_var_spec **,
326 struct dls_var_spec **);
327 static int fixed_parse_fortran (struct fixed_parsing_state *,
328 struct dls_var_spec **,
329 struct dls_var_spec **);
331 /* Parses all the variable specifications for DATA LIST FIXED,
332 storing them into DLS. Returns nonzero if successful. */
334 parse_fixed (struct data_list_pgm *dls)
336 struct fixed_parsing_state fx;
344 while (lex_match ('/'))
347 if (lex_is_integer ())
349 if (lex_integer () < fx.recno)
351 msg (SE, _("The record number specified, %ld, is "
352 "before the previous record, %d. Data "
353 "fields must be listed in order of "
354 "increasing record number."),
355 lex_integer (), fx.recno - 1);
359 fx.recno = lex_integer ();
365 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
368 if (lex_is_number ())
370 if (!fixed_parse_compatible (&fx, &dls->first, &dls->last))
373 else if (token == '(')
375 if (!fixed_parse_fortran (&fx, &dls->first, &dls->last))
380 msg (SE, _("SPSS-like or FORTRAN-like format "
381 "specification expected after variable names."));
385 for (i = 0; i < fx.name_cnt; i++)
389 if (dls->first == NULL)
391 msg (SE, _("At least one variable must be specified."));
394 if (dls->rec_cnt && dls->last->rec > dls->rec_cnt)
396 msg (SE, _("Variables are specified on records that "
397 "should not exist according to RECORDS subcommand."));
400 else if (!dls->rec_cnt)
401 dls->rec_cnt = dls->last->rec;
402 return lex_end_of_command () == CMD_SUCCESS;
405 for (i = 0; i < fx.name_cnt; i++)
411 /* Parses a variable specification in the form 1-10 (A) based on
412 FX and adds specifications to the linked list with head at
413 FIRST and tail at LAST. */
415 fixed_parse_compatible (struct fixed_parsing_state *fx,
416 struct dls_var_spec **first, struct dls_var_spec **last)
418 struct fmt_spec input;
424 if (!lex_force_int ())
429 msg (SE, _("Column positions for fields must be positive."));
435 lex_negative_to_dash ();
438 if (!lex_force_int ())
443 msg (SE, _("Column positions for fields must be positive."));
448 msg (SE, _("The ending column for a field must be "
449 "greater than the starting column."));
458 /* Divide columns evenly. */
459 input.w = (lc - fc + 1) / fx->name_cnt;
460 if ((lc - fc + 1) % fx->name_cnt)
462 msg (SE, _("The %d columns %d-%d "
463 "can't be evenly divided into %d fields."),
464 lc - fc + 1, fc, lc, fx->name_cnt);
468 /* Format specifier. */
471 struct fmt_desc *fdp;
477 input.type = parse_format_specifier_name (&cp, 0);
478 if (input.type == -1)
482 msg (SE, _("A format specifier on this line "
483 "has extra characters on the end."));
493 if (lex_is_integer ())
495 if (lex_integer () < 1)
497 msg (SE, _("The value for number of decimal places "
498 "must be at least 1."));
502 input.d = lex_integer ();
508 fdp = &formats[input.type];
509 if (fdp->n_args < 2 && input.d)
511 msg (SE, _("Input format %s doesn't accept decimal places."),
519 if (!lex_force_match (')'))
527 if (!check_input_specifier (&input, 1))
530 /* Start column for next specification. */
533 /* Width of variables to create. */
534 if (input.type == FMT_A || input.type == FMT_AHEX)
539 /* Create variables and var specs. */
540 for (i = 0; i < fx->name_cnt; i++)
542 struct dls_var_spec *spec;
545 v = dict_create_var (default_dict, fx->name[i], width);
548 convert_fmt_ItoO (&input, &v->print);
550 if (!case_source_is_complex (vfm_source))
555 v = dict_lookup_var_assert (default_dict, fx->name[i]);
556 if (vfm_source == NULL)
558 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
561 if ((width != 0) != (v->width != 0))
563 msg (SE, _("There is already a variable %s of a "
568 if (width != 0 && width != v->width)
570 msg (SE, _("There is already a string variable %s of a "
571 "different width."), fx->name[i]);
576 spec = xmalloc (sizeof *spec);
580 spec->rec = fx->recno;
581 spec->fc = fc + input.w * i;
582 spec->lc = spec->fc + input.w - 1;
583 append_var_spec (first, last, spec);
588 /* Destroy format list F and, if RECURSE is nonzero, all its
591 destroy_fmt_list (struct fmt_list *f, int recurse)
593 struct fmt_list *next;
598 if (recurse && f->f.type == FMT_DESCEND)
599 destroy_fmt_list (f->down, 1);
604 /* Takes a hierarchically structured fmt_list F as constructed by
605 fixed_parse_fortran(), and flattens it, adding the variable
606 specifications to the linked list with head FIRST and tail
607 LAST. NAME_IDX is used to take values from the list of names
608 in FX; it should initially point to a value of 0. */
610 dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f,
611 struct dls_var_spec **first, struct dls_var_spec **last,
616 for (; f; f = f->next)
617 if (f->f.type == FMT_X)
619 else if (f->f.type == FMT_T)
621 else if (f->f.type == FMT_NEWREC)
623 fx->recno += f->count;
627 for (i = 0; i < f->count; i++)
628 if (f->f.type == FMT_DESCEND)
630 if (!dump_fmt_list (fx, f->down, first, last, name_idx))
635 struct dls_var_spec *spec;
639 if (formats[f->f.type].cat & FCAT_STRING)
643 if (*name_idx >= fx->name_cnt)
645 msg (SE, _("The number of format "
646 "specifications exceeds the given number of "
651 v = dict_create_var (default_dict, fx->name[(*name_idx)++], width);
654 msg (SE, _("%s is a duplicate variable name."), fx->name[i]);
658 if (!case_source_is_complex (vfm_source))
661 spec = xmalloc (sizeof *spec);
665 spec->rec = fx->recno;
667 spec->lc = fx->sc + f->f.w - 1;
668 append_var_spec (first, last, spec);
670 convert_fmt_ItoO (&spec->input, &v->print);
678 /* Recursively parses a FORTRAN-like format specification into
679 the linked list with head FIRST and tail TAIL. LEVEL is the
680 level of recursion, starting from 0. Returns the parsed
681 specification if successful, or a null pointer on failure. */
682 static struct fmt_list *
683 fixed_parse_fortran_internal (struct fixed_parsing_state *fx,
684 struct dls_var_spec **first,
685 struct dls_var_spec **last)
687 struct fmt_list *head = NULL;
688 struct fmt_list *tail = NULL;
690 lex_force_match ('(');
694 struct fmt_list *new = xmalloc (sizeof *new);
697 /* Append new to list. */
705 if (lex_is_integer ())
707 new->count = lex_integer ();
713 /* Parse format specifier. */
716 new->f.type = FMT_DESCEND;
717 new->down = fixed_parse_fortran_internal (fx, first, last);
718 if (new->down == NULL)
721 else if (lex_match ('/'))
722 new->f.type = FMT_NEWREC;
723 else if (!parse_format_specifier (&new->f, FMTP_ALLOW_XT)
724 || !check_input_specifier (&new->f, 1))
729 lex_force_match (')');
734 destroy_fmt_list (head, 0);
739 /* Parses a FORTRAN-like format specification into the linked
740 list with head FIRST and tail LAST. Returns nonzero if
743 fixed_parse_fortran (struct fixed_parsing_state *fx,
744 struct dls_var_spec **first, struct dls_var_spec **last)
746 struct fmt_list *list;
749 list = fixed_parse_fortran_internal (fx, first, last);
754 dump_fmt_list (fx, list, first, last, &name_idx);
755 destroy_fmt_list (list, 1);
756 if (name_idx < fx->name_cnt)
758 msg (SE, _("There aren't enough format specifications "
759 "to match the number of variable names given."));
766 /* Displays a table giving information on fixed-format variable
767 parsing on DATA LIST. */
768 /* FIXME: The `Columns' column should be divided into three columns,
769 one for the starting column, one for the dash, one for the ending
770 column; then right-justify the starting column and left-justify the
773 dump_fixed_table (const struct dls_var_spec *specs,
774 const struct file_handle *fh, int rec_cnt)
776 const struct dls_var_spec *spec;
780 for (i = 0, spec = specs; spec; spec = spec->next)
782 t = tab_create (4, i + 1, 0);
783 tab_columns (t, TAB_COL_DOWN, 1);
784 tab_headers (t, 0, 0, 1, 0);
785 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
786 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record"));
787 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Columns"));
788 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format"));
789 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, i);
790 tab_hline (t, TAL_2, 0, 3, 1);
791 tab_dim (t, tab_natural_dimensions);
793 for (i = 1, spec = specs; spec; spec = spec->next, i++)
795 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
796 tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec);
797 tab_text (t, 2, i, TAT_PRINTF, "%3d-%3d",
799 tab_text (t, 3, i, TAB_LEFT | TAT_FIX,
800 fmt_to_string (&spec->input));
804 tab_title (t, 1, ngettext ("Reading %d record from file %s.",
805 "Reading %d records from file %s.", rec_cnt),
806 rec_cnt, handle_get_filename (fh));
808 tab_title (t, 1, ngettext ("Reading %d record from the command file.",
809 "Reading %d records from the command file.",
815 /* Free-format parsing. */
817 /* Parses variable specifications for DATA LIST FREE and adds
818 them to the linked list with head FIRST and tail LAST.
819 Returns nonzero only if successful. */
821 parse_free (struct dls_var_spec **first, struct dls_var_spec **last)
826 struct fmt_spec input, output;
832 if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE))
837 if (!parse_format_specifier (&input, 0)
838 || !check_input_specifier (&input, 1)
839 || !lex_force_match (')'))
841 for (i = 0; i < name_cnt; i++)
846 convert_fmt_ItoO (&input, &output);
851 input = make_input_format (FMT_F, 8, 0);
852 output = get_format ();
855 if (input.type == FMT_A || input.type == FMT_AHEX)
859 for (i = 0; i < name_cnt; i++)
861 struct dls_var_spec *spec;
864 v = dict_create_var (default_dict, name[i], width);
868 msg (SE, _("%s is a duplicate variable name."), name[i]);
871 v->print = v->write = output;
873 if (!case_source_is_complex (vfm_source))
876 spec = xmalloc (sizeof *spec);
880 str_copy_trunc (spec->name, sizeof spec->name, v->name);
881 append_var_spec (first, last, spec);
883 for (i = 0; i < name_cnt; i++)
888 return lex_end_of_command () == CMD_SUCCESS;
891 /* Displays a table giving information on free-format variable parsing
894 dump_free_table (const struct data_list_pgm *dls,
895 const struct file_handle *fh)
901 struct dls_var_spec *spec;
902 for (i = 0, spec = dls->first; spec; spec = spec->next)
906 t = tab_create (2, i + 1, 0);
907 tab_columns (t, TAB_COL_DOWN, 1);
908 tab_headers (t, 0, 0, 1, 0);
909 tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable"));
910 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format"));
911 tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, i);
912 tab_hline (t, TAL_2, 0, 1, 1);
913 tab_dim (t, tab_natural_dimensions);
916 struct dls_var_spec *spec;
918 for (i = 1, spec = dls->first; spec; spec = spec->next, i++)
920 tab_text (t, 0, i, TAB_LEFT, spec->v->name);
921 tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input));
926 tab_title (t, 1, _("Reading free-form data from file %s."),
927 handle_get_filename (fh));
929 tab_title (t, 1, _("Reading free-form data from the command file."));
934 /* Input procedure. */
936 /* Extracts a field from the current position in the current
937 record. Fields can be unquoted or quoted with single- or
938 double-quote characters. *FIELD is set to the field content.
939 After parsing the field, sets the current position in the
940 record to just past the field and any trailing delimiter.
941 END_BLANK is used internally; it should be initialized by the
942 caller to 0 and left alone afterward. Returns 0 on failure or
943 a 1-based column number indicating the beginning of the field
946 cut_field (const struct data_list_pgm *dls, struct fixed_string *field,
949 struct fixed_string line;
953 if (dfm_eof (dls->reader))
955 if (dls->delim_cnt == 0)
956 dfm_expand_tabs (dls->reader);
957 dfm_get_record (dls->reader, &line);
959 cp = ls_c_str (&line);
960 if (dls->delim_cnt == 0)
962 /* Skip leading whitespace. */
963 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
965 if (cp >= ls_end (&line))
968 /* Handle actual data, whether quoted or unquoted. */
969 if (*cp == '\'' || *cp == '"')
973 field->string = ++cp;
974 while (cp < ls_end (&line) && *cp != quote)
976 field->length = cp - field->string;
977 if (cp < ls_end (&line))
980 msg (SW, _("Quoted string missing terminating `%c'."), quote);
985 while (cp < ls_end (&line)
986 && !isspace ((unsigned char) *cp) && *cp != ',')
988 field->length = cp - field->string;
991 /* Skip trailing whitespace and a single comma if present. */
992 while (cp < ls_end (&line) && isspace ((unsigned char) *cp))
994 if (cp < ls_end (&line) && *cp == ',')
999 if (cp >= ls_end (&line))
1001 int column = dfm_column_start (dls->reader);
1002 /* A blank line or a line that ends in \t has a
1003 trailing blank field. */
1004 if (column == 1 || (column > 1 && cp[-1] == '\t'))
1006 if (*end_blank == 0)
1009 field->string = ls_end (&line);
1011 dfm_forward_record (dls->reader);
1026 while (cp < ls_end (&line)
1027 && memchr (dls->delims, *cp, dls->delim_cnt) == NULL)
1029 field->length = cp - field->string;
1030 if (cp < ls_end (&line))
1035 dfm_forward_columns (dls->reader, field->string - line.string);
1036 column_start = dfm_column_start (dls->reader);
1038 dfm_forward_columns (dls->reader, cp - field->string);
1040 return column_start;
1043 typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *);
1044 static data_list_read_func read_from_data_list_fixed;
1045 static data_list_read_func read_from_data_list_free;
1046 static data_list_read_func read_from_data_list_list;
1048 /* Returns the proper function to read the kind of DATA LIST
1049 data specified by DLS. */
1050 static data_list_read_func *
1051 get_data_list_read_func (const struct data_list_pgm *dls)
1056 return read_from_data_list_fixed;
1059 return read_from_data_list_free;
1062 return read_from_data_list_list;
1070 /* Reads a case from the data file into C, parsing it according
1071 to fixed-format syntax rules in DLS. Returns -1 on success,
1072 -2 at end of file. */
1074 read_from_data_list_fixed (const struct data_list_pgm *dls,
1077 struct dls_var_spec *var_spec = dls->first;
1080 if (dfm_eof (dls->reader))
1082 for (i = 1; i <= dls->rec_cnt; i++)
1084 struct fixed_string line;
1086 if (dfm_eof (dls->reader))
1088 /* Note that this can't occur on the first record. */
1089 msg (SW, _("Partial case of %d of %d records discarded."),
1090 i - 1, dls->rec_cnt);
1093 dfm_expand_tabs (dls->reader);
1094 dfm_get_record (dls->reader, &line);
1096 for (; var_spec && i == var_spec->rec; var_spec = var_spec->next)
1100 data_in_finite_line (&di, ls_c_str (&line), ls_length (&line),
1101 var_spec->fc, var_spec->lc);
1102 di.v = case_data_rw (c, var_spec->fv);
1103 di.flags = DI_IMPLIED_DECIMALS;
1104 di.f1 = var_spec->fc;
1105 di.format = var_spec->input;
1110 dfm_forward_record (dls->reader);
1116 /* Reads a case from the data file into C, parsing it according
1117 to free-format syntax rules in DLS. Returns -1 on success,
1118 -2 at end of file. */
1120 read_from_data_list_free (const struct data_list_pgm *dls,
1123 struct dls_var_spec *var_spec;
1126 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1128 struct fixed_string field;
1131 /* Cut out a field and read in a new record if necessary. */
1134 column = cut_field (dls, &field, &end_blank);
1138 if (!dfm_eof (dls->reader))
1139 dfm_forward_record (dls->reader);
1140 if (dfm_eof (dls->reader))
1142 if (var_spec != dls->first)
1143 msg (SW, _("Partial case discarded. The first variable "
1144 "missing was %s."), var_spec->name);
1152 di.s = ls_c_str (&field);
1153 di.e = ls_end (&field);
1154 di.v = case_data_rw (c, var_spec->fv);
1157 di.format = var_spec->input;
1164 /* Reads a case from the data file and parses it according to
1165 list-format syntax rules. Returns -1 on success, -2 at end of
1168 read_from_data_list_list (const struct data_list_pgm *dls,
1171 struct dls_var_spec *var_spec;
1174 if (dfm_eof (dls->reader))
1177 for (var_spec = dls->first; var_spec; var_spec = var_spec->next)
1179 struct fixed_string field;
1182 /* Cut out a field and check for end-of-line. */
1183 column = cut_field (dls, &field, &end_blank);
1186 if (get_undefined ())
1187 msg (SW, _("Missing value(s) for all variables from %s onward. "
1188 "These will be filled with the system-missing value "
1189 "or blanks, as appropriate."),
1191 for (; var_spec; var_spec = var_spec->next)
1193 int width = get_format_var_width (&var_spec->input);
1195 case_data_rw (c, var_spec->fv)->f = SYSMIS;
1197 memset (case_data_rw (c, var_spec->fv)->s, ' ', width);
1205 di.s = ls_c_str (&field);
1206 di.e = ls_end (&field);
1207 di.v = case_data_rw (c, var_spec->fv);
1210 di.format = var_spec->input;
1215 dfm_forward_record (dls->reader);
1219 /* Destroys SPEC. */
1221 destroy_dls_var_spec (struct dls_var_spec *spec)
1223 struct dls_var_spec *next;
1225 while (spec != NULL)
1233 /* Destroys DATA LIST transformation PGM. */
1235 data_list_trns_free (struct trns_header *pgm)
1237 struct data_list_pgm *dls = (struct data_list_pgm *) pgm;
1239 destroy_dls_var_spec (dls->first);
1240 dfm_close_reader (dls->reader);
1243 /* Handle DATA LIST transformation T, parsing data into C. */
1245 data_list_trns_proc (struct trns_header *t, struct ccase *c,
1246 int case_num UNUSED)
1248 struct data_list_pgm *dls = (struct data_list_pgm *) t;
1249 data_list_read_func *read_func;
1252 dfm_push (dls->reader);
1254 read_func = get_data_list_read_func (dls);
1255 retval = read_func (dls, c);
1257 /* Handle end of file. */
1260 /* If we already encountered end of file then this is an
1264 msg (SE, _("Attempt to read past end of file."));
1266 dfm_pop (dls->reader);
1270 /* Otherwise simply note it. */
1276 /* If there was an END subcommand handle it. */
1277 if (dls->end != NULL)
1281 case_data_rw (c, dls->end->fv)->f = 1.0;
1285 case_data_rw (c, dls->end->fv)->f = 0.0;
1288 dfm_pop (dls->reader);
1293 /* Reads all the records from the data file and passes them to
1296 data_list_source_read (struct case_source *source,
1298 write_case_func *write_case, write_case_data wc_data)
1300 struct data_list_pgm *dls = source->aux;
1301 data_list_read_func *read_func = get_data_list_read_func (dls);
1303 dfm_push (dls->reader);
1304 while (read_func (dls, c) != -2)
1305 if (!write_case (wc_data))
1307 dfm_pop (dls->reader);
1310 /* Destroys the source's internal data. */
1312 data_list_source_destroy (struct case_source *source)
1314 data_list_trns_free (source->aux);
1318 const struct case_source_class data_list_source_class =
1322 data_list_source_read,
1323 data_list_source_destroy,
1326 /* REPEATING DATA. */
1328 /* Represents a number or a variable. */
1329 struct rpd_num_or_var
1331 int num; /* Value, or 0. */
1332 struct variable *var; /* Variable, if number==0. */
1335 /* REPEATING DATA private data structure. */
1336 struct repeating_data_trns
1338 struct trns_header h;
1339 struct dls_var_spec *first, *last; /* Variable parsing specifications. */
1340 struct dfm_reader *reader; /* Input file, never NULL. */
1342 struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */
1343 struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */
1344 struct rpd_num_or_var occurs; /* OCCURS= subcommand. */
1345 struct rpd_num_or_var length; /* LENGTH= subcommand. */
1346 struct rpd_num_or_var cont_beg; /* CONTINUED=, before the dash. */
1347 struct rpd_num_or_var cont_end; /* CONTINUED=, after the dash. */
1349 /* ID subcommand. */
1350 int id_beg, id_end; /* Beginning & end columns. */
1351 struct variable *id_var; /* DATA LIST variable. */
1352 struct fmt_spec id_spec; /* Input format spec. */
1353 union value *id_value; /* ID value. */
1355 write_case_func *write_case;
1356 write_case_data wc_data;
1359 static trns_free_func repeating_data_trns_free;
1360 static int parse_num_or_var (struct rpd_num_or_var *, const char *);
1361 static int parse_repeating_data (struct dls_var_spec **,
1362 struct dls_var_spec **);
1363 static void find_variable_input_spec (struct variable *v,
1364 struct fmt_spec *spec);
1366 /* Parses the REPEATING DATA command. */
1368 cmd_repeating_data (void)
1370 struct repeating_data_trns *rpd;
1371 int table = 1; /* Print table? */
1372 bool saw_starts = false; /* Saw STARTS subcommand? */
1373 bool saw_occurs = false; /* Saw OCCURS subcommand? */
1374 bool saw_length = false; /* Saw LENGTH subcommand? */
1375 bool saw_continued = false; /* Saw CONTINUED subcommand? */
1376 bool saw_id = false; /* Saw ID subcommand? */
1377 struct file_handle *const fh = default_handle;
1379 assert (case_source_is_complex (vfm_source));
1381 rpd = xmalloc (sizeof *rpd);
1382 rpd->reader = dfm_open_reader (default_handle);
1383 rpd->first = rpd->last = NULL;
1384 rpd->starts_beg.num = 0;
1385 rpd->starts_beg.var = NULL;
1386 rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg
1387 = rpd->cont_end = rpd->starts_beg;
1388 rpd->id_beg = rpd->id_end = 0;
1390 rpd->id_value = NULL;
1396 if (lex_match_id ("FILE"))
1398 struct file_handle *file;
1405 msg (SE, _("REPEATING DATA must use the same file as its "
1406 "corresponding DATA LIST or FILE TYPE."));
1410 else if (lex_match_id ("STARTS"))
1415 msg (SE, _("%s subcommand given multiple times."),"STARTS");
1420 if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column"))
1423 lex_negative_to_dash ();
1424 if (lex_match ('-'))
1426 if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column"))
1429 /* Otherwise, rpd->starts_end is uninitialized. We
1430 will initialize it later from the record length
1431 of the file. We can't do so now because the
1432 file handle may not be specified yet. */
1435 if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0
1436 && rpd->starts_beg.num > rpd->starts_end.num)
1438 msg (SE, _("STARTS beginning column (%d) exceeds "
1439 "STARTS ending column (%d)."),
1440 rpd->starts_beg.num, rpd->starts_end.num);
1444 else if (lex_match_id ("OCCURS"))
1449 msg (SE, _("%s subcommand given multiple times."),"OCCURS");
1454 if (!parse_num_or_var (&rpd->occurs, "OCCURS"))
1457 else if (lex_match_id ("LENGTH"))
1462 msg (SE, _("%s subcommand given multiple times."),"LENGTH");
1467 if (!parse_num_or_var (&rpd->length, "LENGTH"))
1470 else if (lex_match_id ("CONTINUED"))
1473 if (saw_continued & 8)
1475 msg (SE, _("%s subcommand given multiple times."),"CONTINUED");
1480 if (!lex_match ('/'))
1482 if (!parse_num_or_var (&rpd->cont_beg,
1483 "CONTINUED beginning column"))
1486 lex_negative_to_dash ();
1488 && !parse_num_or_var (&rpd->cont_end,
1489 "CONTINUED ending column"))
1492 if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0
1493 && rpd->cont_beg.num > rpd->cont_end.num)
1495 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1496 "CONTINUED ending column (%d)."),
1497 rpd->cont_beg.num, rpd->cont_end.num);
1502 rpd->cont_beg.num = 1;
1504 else if (lex_match_id ("ID"))
1509 msg (SE, _("%s subcommand given multiple times."),"ID");
1514 if (!lex_force_int ())
1516 if (lex_integer () < 1)
1518 msg (SE, _("ID beginning column (%ld) must be positive."),
1522 rpd->id_beg = lex_integer ();
1525 lex_negative_to_dash ();
1527 if (lex_match ('-'))
1529 if (!lex_force_int ())
1531 if (lex_integer () < 1)
1533 msg (SE, _("ID ending column (%ld) must be positive."),
1537 if (lex_integer () < rpd->id_end)
1539 msg (SE, _("ID ending column (%ld) cannot be less than "
1540 "ID beginning column (%d)."),
1541 lex_integer (), rpd->id_beg);
1545 rpd->id_end = lex_integer ();
1548 else rpd->id_end = rpd->id_beg;
1550 if (!lex_force_match ('='))
1552 rpd->id_var = parse_variable ();
1553 if (rpd->id_var == NULL)
1556 find_variable_input_spec (rpd->id_var, &rpd->id_spec);
1557 rpd->id_value = xmalloc (sizeof *rpd->id_value * rpd->id_var->nv);
1559 else if (lex_match_id ("TABLE"))
1561 else if (lex_match_id ("NOTABLE"))
1563 else if (lex_match_id ("DATA"))
1571 if (!lex_force_match ('/'))
1575 /* Comes here when DATA specification encountered. */
1576 if (!saw_starts || !saw_occurs)
1579 msg (SE, _("Missing required specification STARTS."));
1581 msg (SE, _("Missing required specification OCCURS."));
1585 /* Enforce ID restriction. */
1586 if (saw_id && !saw_continued)
1588 msg (SE, _("ID specified without CONTINUED."));
1592 /* Calculate and check starts_end, cont_end if necessary. */
1593 if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL)
1595 rpd->starts_end.num = fh != NULL ? handle_get_record_width (fh) : 80;
1596 if (rpd->starts_beg.num != 0
1597 && rpd->starts_beg.num > rpd->starts_end.num)
1599 msg (SE, _("STARTS beginning column (%d) exceeds "
1600 "default STARTS ending column taken from file's "
1601 "record width (%d)."),
1602 rpd->starts_beg.num, rpd->starts_end.num);
1606 if (rpd->cont_end.num == 0 && rpd->cont_end.var == NULL)
1608 rpd->cont_end.num = fh != NULL ? handle_get_record_width (fh) : 80;
1609 if (rpd->cont_beg.num != 0
1610 && rpd->cont_beg.num > rpd->cont_end.num)
1612 msg (SE, _("CONTINUED beginning column (%d) exceeds "
1613 "default CONTINUED ending column taken from file's "
1614 "record width (%d)."),
1615 rpd->cont_beg.num, rpd->cont_end.num);
1621 if (!parse_repeating_data (&rpd->first, &rpd->last))
1624 /* Calculate length if necessary. */
1627 struct dls_var_spec *iter;
1629 for (iter = rpd->first; iter; iter = iter->next)
1630 if (iter->lc > rpd->length.num)
1631 rpd->length.num = iter->lc;
1632 assert (rpd->length.num != 0);
1636 dump_fixed_table (rpd->first, fh, rpd->last->rec);
1638 rpd->h.proc = repeating_data_trns_proc;
1639 rpd->h.free = repeating_data_trns_free;
1640 add_transformation (&rpd->h);
1642 return lex_end_of_command ();
1645 destroy_dls_var_spec (rpd->first);
1646 free (rpd->id_value);
1650 /* Finds the input format specification for variable V and puts
1651 it in SPEC. Because of the way that DATA LIST is structured,
1652 this is nontrivial. */
1654 find_variable_input_spec (struct variable *v, struct fmt_spec *spec)
1658 for (i = 0; i < n_trns; i++)
1660 struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i];
1662 if (pgm->h.proc == data_list_trns_proc)
1664 struct dls_var_spec *iter;
1666 for (iter = pgm->first; iter; iter = iter->next)
1669 *spec = iter->input;
1678 /* Parses a number or a variable name from the syntax file and puts
1679 the results in VALUE. Ensures that the number is at least 1; else
1680 emits an error based on MESSAGE. Returns nonzero only if
1683 parse_num_or_var (struct rpd_num_or_var *value, const char *message)
1688 value->var = parse_variable ();
1689 if (value->var == NULL)
1691 if (value->var->type == ALPHA)
1693 msg (SE, _("String variable not allowed here."));
1697 else if (lex_is_integer ())
1699 value->num = lex_integer ();
1703 msg (SE, _("%s (%d) must be at least 1."), message, value->num);
1709 msg (SE, _("Variable or integer expected for %s."), message);
1715 /* Parses data specifications for repeating data groups, adding
1716 them to the linked list with head FIRST and tail LAST.
1717 Returns nonzero only if successful. */
1719 parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last)
1721 struct fixed_parsing_state fx;
1727 while (token != '.')
1729 if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE))
1732 if (lex_is_number ())
1734 if (!fixed_parse_compatible (&fx, first, last))
1737 else if (token == '(')
1739 if (!fixed_parse_fortran (&fx, first, last))
1744 msg (SE, _("SPSS-like or FORTRAN-like format "
1745 "specification expected after variable names."));
1749 for (i = 0; i < fx.name_cnt; i++)
1757 for (i = 0; i < fx.name_cnt; i++)
1763 /* Obtains the real value for rpd_num_or_var N in case C and returns
1764 it. The valid range is nonnegative numbers, but numbers outside
1765 this range can be returned and should be handled by the caller as
1768 realize_value (struct rpd_num_or_var *n, struct ccase *c)
1772 double v = case_num (c, n->var->fv);
1773 return v != SYSMIS && v >= INT_MIN && v <= INT_MAX ? v : -1;
1779 /* Parameter record passed to rpd_parse_record(). */
1780 struct rpd_parse_info
1782 struct repeating_data_trns *trns; /* REPEATING DATA transformation. */
1783 const char *line; /* Line being parsed. */
1784 size_t len; /* Line length. */
1785 int beg, end; /* First and last column of first occurrence. */
1786 int ofs; /* Column offset between repeated occurrences. */
1787 struct ccase *c; /* Case to fill in. */
1788 int verify_id; /* Zero to initialize ID, nonzero to verify it. */
1789 int max_occurs; /* Max number of occurrences to parse. */
1792 /* Parses one record of repeated data and outputs corresponding
1793 cases. Returns number of occurrences parsed up to the
1794 maximum specified in INFO. */
1796 rpd_parse_record (const struct rpd_parse_info *info)
1798 struct repeating_data_trns *t = info->trns;
1799 int cur = info->beg;
1802 /* Handle record ID values. */
1805 union value id_temp[MAX_ELEMS_PER_VALUE];
1807 /* Parse record ID into V. */
1811 data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end);
1812 di.v = info->verify_id ? id_temp : t->id_value;
1815 di.format = t->id_spec;
1822 && compare_values (id_temp, t->id_value, t->id_var->width) != 0)
1824 char expected_str [MAX_FORMATTED_LEN + 1];
1825 char actual_str [MAX_FORMATTED_LEN + 1];
1827 data_out (expected_str, &t->id_var->print, t->id_value);
1828 expected_str[t->id_var->print.w] = '\0';
1830 data_out (actual_str, &t->id_var->print, id_temp);
1831 actual_str[t->id_var->print.w] = '\0';
1834 _("Encountered mismatched record ID \"%s\" expecting \"%s\"."),
1835 actual_str, expected_str);
1841 /* Iterate over the set of expected occurrences and record each of
1842 them as a separate case. FIXME: We need to execute any
1843 transformations that follow the current one. */
1847 for (occurrences = 0; occurrences < info->max_occurs; )
1849 if (cur + info->ofs > info->end + 1)
1854 struct dls_var_spec *var_spec = t->first;
1856 for (; var_spec; var_spec = var_spec->next)
1858 int fc = var_spec->fc - 1 + cur;
1859 int lc = var_spec->lc - 1 + cur;
1861 if (fc > info->len && !warned && var_spec->input.type != FMT_A)
1866 _("Variable %s starting in column %d extends "
1867 "beyond physical record length of %d."),
1868 var_spec->v->name, fc, info->len);
1874 data_in_finite_line (&di, info->line, info->len, fc, lc);
1875 di.v = case_data_rw (info->c, var_spec->fv);
1878 di.format = var_spec->input;
1888 if (!t->write_case (t->wc_data))
1896 /* Reads one set of repetitions of the elements in the REPEATING
1897 DATA structure. Returns -1 on success, -2 on end of file or
1900 repeating_data_trns_proc (struct trns_header *trns, struct ccase *c,
1901 int case_num UNUSED)
1903 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
1905 struct fixed_string line; /* Current record. */
1907 int starts_beg; /* Starting column. */
1908 int starts_end; /* Ending column. */
1909 int occurs; /* Number of repetitions. */
1910 int length; /* Length of each occurrence. */
1911 int cont_beg; /* Starting column for continuation lines. */
1912 int cont_end; /* Ending column for continuation lines. */
1914 int occurs_left; /* Number of occurrences remaining. */
1916 int code; /* Return value from rpd_parse_record(). */
1918 int skip_first_record = 0;
1920 dfm_push (t->reader);
1922 /* Read the current record. */
1923 dfm_reread_record (t->reader, 1);
1924 dfm_expand_tabs (t->reader);
1925 if (dfm_eof (t->reader))
1927 dfm_get_record (t->reader, &line);
1928 dfm_forward_record (t->reader);
1930 /* Calculate occurs, length. */
1931 occurs_left = occurs = realize_value (&t->occurs, c);
1934 tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs);
1937 starts_beg = realize_value (&t->starts_beg, c);
1938 if (starts_beg <= 0)
1940 tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be "
1945 starts_end = realize_value (&t->starts_end, c);
1946 if (starts_end < starts_beg)
1948 tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than "
1949 "beginning column (%d)."),
1950 starts_end, starts_beg);
1951 skip_first_record = 1;
1953 length = realize_value (&t->length, c);
1956 tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length);
1958 occurs = occurs_left = 1;
1960 cont_beg = realize_value (&t->cont_beg, c);
1963 tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be "
1968 cont_end = realize_value (&t->cont_end, c);
1969 if (cont_end < cont_beg)
1971 tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than "
1972 "beginning column (%d)."),
1973 cont_end, cont_beg);
1977 /* Parse the first record. */
1978 if (!skip_first_record)
1980 struct rpd_parse_info info;
1982 info.line = ls_c_str (&line);
1983 info.len = ls_length (&line);
1984 info.beg = starts_beg;
1985 info.end = starts_end;
1989 info.max_occurs = occurs_left;
1990 code = rpd_parse_record (&info);
1993 occurs_left -= code;
1995 else if (cont_beg == 0)
1998 /* Make sure, if some occurrences are left, that we have
1999 continuation records. */
2000 if (occurs_left > 0 && cont_beg == 0)
2003 _("Number of repetitions specified on OCCURS (%d) "
2004 "exceed number of repetitions available in "
2005 "space on STARTS (%d), and CONTINUED not specified."),
2006 occurs, (starts_end - starts_beg + 1) / length);
2010 /* Go on to additional records. */
2011 while (occurs_left != 0)
2013 struct rpd_parse_info info;
2015 assert (occurs_left >= 0);
2017 /* Read in another record. */
2018 if (dfm_eof (t->reader))
2021 _("Unexpected end of file with %d repetitions "
2022 "remaining out of %d."),
2023 occurs_left, occurs);
2026 dfm_expand_tabs (t->reader);
2027 dfm_get_record (t->reader, &line);
2028 dfm_forward_record (t->reader);
2030 /* Parse this record. */
2032 info.line = ls_c_str (&line);
2033 info.len = ls_length (&line);
2034 info.beg = cont_beg;
2035 info.end = cont_end;
2039 info.max_occurs = occurs_left;
2040 code = rpd_parse_record (&info);;
2043 occurs_left -= code;
2046 dfm_pop (t->reader);
2048 /* FIXME: This is a kluge until we've implemented multiplexing of
2053 /* Frees a REPEATING DATA transformation. */
2055 repeating_data_trns_free (struct trns_header *rpd_)
2057 struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_;
2059 destroy_dls_var_spec (rpd->first);
2060 dfm_close_reader (rpd->reader);
2061 free (rpd->id_value);
2064 /* Lets repeating_data_trns_proc() know how to write the cases
2065 that it composes. Not elegant. */
2067 repeating_data_set_write_case (struct trns_header *trns,
2068 write_case_func *write_case,
2069 write_case_data wc_data)
2071 struct repeating_data_trns *t = (struct repeating_data_trns *) trns;
2073 assert (trns->proc == repeating_data_trns_proc);
2074 t->write_case = write_case;
2075 t->wc_data = wc_data;