X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata-list.c;h=ab166f0d65585b9c876ed06333e46240240cbde0;hb=16aa47dbdde420fe82032f7d2e166fdf4e974df5;hp=11b7f9c7068a44ad5eac04f2f4854e48bfaaaba1;hpb=f2828f801736701c0294803b5dedd4c4ab63b45e;p=pspp-builds.git diff --git a/src/data-list.c b/src/data-list.c index 11b7f9c7..ab166f0d 100644 --- a/src/data-list.c +++ b/src/data-list.c @@ -14,21 +14,23 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ #include #include "data-list.h" -#include +#include "error.h" #include #include #include #include #include "alloc.h" +#include "case.h" #include "command.h" #include "data-in.h" #include "debug-print.h" -#include "dfm.h" +#include "dfm-read.h" +#include "dictionary.h" #include "error.h" #include "file-handle.h" #include "format.h" @@ -39,6 +41,9 @@ #include "tab.h" #include "var.h" #include "vfm.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) /* Utility function. */ @@ -63,7 +68,7 @@ struct dls_var_spec int fc, lc; /* Column numbers in record. */ /* Free format only. */ - char name[9]; /* Name of variable. */ + char name[LONG_NAME_LEN + 1]; /* Name of variable. */ }; /* Constants for DATA LIST type. */ @@ -78,25 +83,27 @@ enum /* DATA LIST private data structure. */ struct data_list_pgm { - struct trns_header h; struct dls_var_spec *first, *last; /* Variable parsing specifications. */ - struct file_handle *handle; /* Input file, never NULL. */ - /* Do not reorder preceding fields. */ + struct dfm_reader *reader; /* Data file reader. */ int type; /* A DLS_* constant. */ struct variable *end; /* Variable specified on END subcommand. */ int eof; /* End of file encountered. */ - int nrec; /* Number of records. */ + int rec_cnt; /* Number of records. */ + size_t case_size; /* Case size in bytes. */ + char *delims; /* Delimiters if any; not null-terminated. */ + size_t delim_cnt; /* Number of delimiter, or 0 for spaces. */ }; static int parse_fixed (struct data_list_pgm *); static int parse_free (struct dls_var_spec **, struct dls_var_spec **); -static void dump_fixed_table (const struct dls_var_spec *specs, - const struct file_handle *handle, int nrec); -static void dump_free_table (const struct data_list_pgm *); +static void dump_fixed_table (const struct dls_var_spec *, + const struct file_handle *, int rec_cnt); +static void dump_free_table (const struct data_list_pgm *, + const struct file_handle *); static void destroy_dls_var_spec (struct dls_var_spec *); -static trns_free_func destroy_dls; -static trns_proc_func read_one_case; +static trns_free_func data_list_trns_free; +static trns_proc_func data_list_trns_proc; /* Message title for REPEATING DATA. */ #define RPD_ERR "REPEATING DATA: " @@ -104,24 +111,21 @@ static trns_proc_func read_one_case; int cmd_data_list (void) { - /* DATA LIST program under construction. */ - struct data_list_pgm *dls; - - /* 0=print no table, 1=print table. (TABLE subcommand.) */ - int table = -1; - - lex_match_id ("DATA"); - lex_match_id ("LIST"); + struct data_list_pgm *dls; /* DATA LIST program under construction. */ + int table = -1; /* Print table if nonzero, -1=undecided. */ + struct file_handle *fh = NULL; /* File handle of source, NULL=inline file. */ if (!case_source_is_complex (vfm_source)) discard_variables (); dls = xmalloc (sizeof *dls); - dls->handle = default_handle; + dls->reader = NULL; dls->type = -1; dls->end = NULL; dls->eof = 0; - dls->nrec = 0; + dls->rec_cnt = 0; + dls->delims = NULL; + dls->delim_cnt = 0; dls->first = dls->last = NULL; while (token != '/') @@ -129,11 +133,11 @@ cmd_data_list (void) if (lex_match_id ("FILE")) { lex_match ('='); - dls->handle = fh_parse_file_handle (); - if (!dls->handle) + fh = fh_parse (); + if (fh == NULL) goto error; if (case_source_is_class (vfm_source, &file_type_source_class) - && dls->handle != default_handle) + && fh != default_handle) { msg (SE, _("DATA LIST may not use a different file from " "that specified on its surrounding FILE TYPE.")); @@ -146,7 +150,7 @@ cmd_data_list (void) lex_match ('('); if (!lex_force_int ()) goto error; - dls->nrec = lex_integer (); + dls->rec_cnt = lex_integer (); lex_get (); lex_match (')'); } @@ -168,38 +172,61 @@ cmd_data_list (void) } else if (token == T_ID) { - /* Must match DLS_* constants. */ - static const char *id[] = {"FIXED", "FREE", "LIST", "NOTABLE", - "TABLE", NULL}; - const char **p; - int index; - - for (p = id; *p; p++) - if (lex_id_match (*p, tokid)) - break; - if (*p == NULL) - { - lex_error (NULL); - goto error; - } - - lex_get (); + if (lex_match_id ("NOTABLE")) + table = 0; + else if (lex_match_id ("TABLE")) + table = 1; + else + { + int type; + if (lex_match_id ("FIXED")) + type = DLS_FIXED; + else if (lex_match_id ("FREE")) + type = DLS_FREE; + else if (lex_match_id ("LIST")) + type = DLS_LIST; + else + { + lex_error (NULL); + goto error; + } - index = p - id; - if (index < 3) - { if (dls->type != -1) { msg (SE, _("Only one of FIXED, FREE, or LIST may " - "be specified.")); + "be specified.")); goto error; } - - dls->type = index; - } - else - table = index - 3; - } + dls->type = type; + + if ((dls->type == DLS_FREE || dls->type == DLS_LIST) + && lex_match ('(')) + { + while (!lex_match (')')) + { + int delim; + + if (lex_match_id ("TAB")) + delim = '\t'; + else if (token == T_STRING && tokstr.length == 1) + { + delim = tokstr.string[0]; + lex_get(); + } + else + { + lex_error (NULL); + goto error; + } + + dls->delims = xrealloc (dls->delims, dls->delim_cnt + 1); + dls->delims[dls->delim_cnt++] = delim; + + lex_match (','); + } + } + } + } else { lex_error (NULL); @@ -207,7 +234,8 @@ cmd_data_list (void) } } - default_handle = dls->handle; + dls->case_size = dict_get_case_size (default_dict); + default_handle = fh; if (dls->type == -1) dls->type = DLS_FIXED; @@ -225,38 +253,34 @@ cmd_data_list (void) if (!parse_fixed (dls)) goto error; if (table) - dump_fixed_table (dls->first, dls->handle, dls->nrec); + dump_fixed_table (dls->first, fh, dls->rec_cnt); } else { if (!parse_free (&dls->first, &dls->last)) goto error; if (table) - dump_free_table (dls); + dump_free_table (dls, fh); } - if (vfm_source != NULL) - { - struct data_list_pgm *new_pgm; - - dls->h.proc = read_one_case; - dls->h.free = destroy_dls; + dls->reader = dfm_open_reader (fh); + if (dls->reader == NULL) + goto error; - new_pgm = xmalloc (sizeof *new_pgm); - memcpy (new_pgm, &dls, sizeof *new_pgm); - add_transformation (&new_pgm->h); - } + if (vfm_source != NULL) + add_transformation (data_list_trns_proc, data_list_trns_free, dls); else vfm_source = create_case_source (&data_list_source_class, dls); return CMD_SUCCESS; error: - destroy_dls_var_spec (dls->first); - free (dls); + data_list_trns_free (dls); return CMD_FAILURE; } +/* Adds SPEC to the linked list with head at FIRST and tail at + LAST. */ static void append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last, struct dls_var_spec *spec) @@ -281,13 +305,11 @@ struct fmt_list struct fmt_list *down; }; -/* Used as "local" variables among the fixed-format parsing funcs. If - it were guaranteed that PSPP were going to be compiled by gcc, - I'd make all these functions a single set of nested functions. */ +/* State of parsing DATA LIST. */ struct fixed_parsing_state { char **name; /* Variable names. */ - int name_cnt; /* Number of names. */ + size_t name_cnt; /* Number of names. */ int recno; /* Index of current record. */ int sc; /* 1-based column number of starting column for @@ -301,11 +323,13 @@ static int fixed_parse_fortran (struct fixed_parsing_state *, struct dls_var_spec **, struct dls_var_spec **); +/* Parses all the variable specifications for DATA LIST FIXED, + storing them into DLS. Returns nonzero if successful. */ static int parse_fixed (struct data_list_pgm *dls) { struct fixed_parsing_state fx; - int i; + size_t i; fx.recno = 0; fx.sc = 1; @@ -315,7 +339,7 @@ parse_fixed (struct data_list_pgm *dls) while (lex_match ('/')) { fx.recno++; - if (lex_integer_p ()) + if (lex_is_integer ()) { if (lex_integer () < fx.recno) { @@ -336,7 +360,7 @@ parse_fixed (struct data_list_pgm *dls) if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE)) return 0; - if (token == T_NUM) + if (lex_is_number ()) { if (!fixed_parse_compatible (&fx, &dls->first, &dls->last)) goto fail; @@ -349,7 +373,7 @@ parse_fixed (struct data_list_pgm *dls) else { msg (SE, _("SPSS-like or FORTRAN-like format " - "specification expected after variable names.")); + "specification expected after variable names.")); goto fail; } @@ -362,20 +386,15 @@ parse_fixed (struct data_list_pgm *dls) msg (SE, _("At least one variable must be specified.")); return 0; } - if (dls->nrec && dls->last->rec > dls->nrec) + if (dls->rec_cnt && dls->last->rec > dls->rec_cnt) { msg (SE, _("Variables are specified on records that " "should not exist according to RECORDS subcommand.")); return 0; } - else if (!dls->nrec) - dls->nrec = dls->last->rec; - if (token != '.') - { - lex_error (_("expecting end of command")); - return 0; - } - return 1; + else if (!dls->rec_cnt) + dls->rec_cnt = dls->last->rec; + return lex_end_of_command () == CMD_SUCCESS; fail: for (i = 0; i < fx.name_cnt; i++) @@ -384,6 +403,9 @@ fail: return 0; } +/* Parses a variable specification in the form 1-10 (A) based on + FX and adds specifications to the linked list with head at + FIRST and tail at LAST. */ static int fixed_parse_compatible (struct fixed_parsing_state *fx, struct dls_var_spec **first, struct dls_var_spec **last) @@ -463,7 +485,7 @@ fixed_parse_compatible (struct fixed_parsing_state *fx, else input.type = FMT_F; - if (lex_integer_p ()) + if (lex_is_integer ()) { if (lex_integer () < 1) { @@ -497,7 +519,7 @@ fixed_parse_compatible (struct fixed_parsing_state *fx, input.type = FMT_F; input.d = 0; } - if (!check_input_specifier (&input)) + if (!check_input_specifier (&input, 1)) return 0; /* Start column for next specification. */ @@ -558,7 +580,8 @@ fixed_parse_compatible (struct fixed_parsing_state *fx, return 1; } -/* Destroy a format list and, optionally, all its sublists. */ +/* Destroy format list F and, if RECURSE is nonzero, all its + sublists. */ static void destroy_fmt_list (struct fmt_list *f, int recurse) { @@ -574,9 +597,10 @@ destroy_fmt_list (struct fmt_list *f, int recurse) } /* Takes a hierarchically structured fmt_list F as constructed by - fixed_parse_fortran(), and flattens it into a linear list of - dls_var_spec's. NAME_IDX is used to take values from the list - of names in FX; it should initially point to a value of 0. */ + fixed_parse_fortran(), and flattens it, adding the variable + specifications to the linked list with head FIRST and tail + LAST. NAME_IDX is used to take values from the list of names + in FX; it should initially point to a value of 0. */ static int dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f, struct dls_var_spec **first, struct dls_var_spec **last, @@ -646,10 +670,10 @@ dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f, return 1; } -/* Recursively parses a FORTRAN-like format specification. LEVEL - is the level of recursion, starting from 0. Returns the - parsed specification if successful, or a null pointer on - failure. */ +/* Recursively parses a FORTRAN-like format specification into + the linked list with head FIRST and tail TAIL. LEVEL is the + level of recursion, starting from 0. Returns the parsed + specification if successful, or a null pointer on failure. */ static struct fmt_list * fixed_parse_fortran_internal (struct fixed_parsing_state *fx, struct dls_var_spec **first, @@ -673,7 +697,7 @@ fixed_parse_fortran_internal (struct fixed_parsing_state *fx, tail = new; /* Parse count. */ - if (lex_integer_p ()) + if (lex_is_integer ()) { new->count = lex_integer (); lex_get (); @@ -691,8 +715,8 @@ fixed_parse_fortran_internal (struct fixed_parsing_state *fx, } else if (lex_match ('/')) new->f.type = FMT_NEWREC; - else if (!parse_format_specifier (&new->f, 1) - || !check_input_specifier (&new->f)) + else if (!parse_format_specifier (&new->f, FMTP_ALLOW_XT) + || !check_input_specifier (&new->f, 1)) goto fail; lex_match (','); @@ -707,8 +731,9 @@ fail: return NULL; } -/* Parses a FORTRAN-like format specification. Returns nonzero - if successful. */ +/* Parses a FORTRAN-like format specification into the linked + list with head FIRST and tail LAST. Returns nonzero if + successful. */ static int fixed_parse_fortran (struct fixed_parsing_state *fx, struct dls_var_spec **first, struct dls_var_spec **last) @@ -741,12 +766,10 @@ fixed_parse_fortran (struct fixed_parsing_state *fx, ending column. */ static void dump_fixed_table (const struct dls_var_spec *specs, - const struct file_handle *handle, int nrec) + const struct file_handle *fh, int rec_cnt) { const struct dls_var_spec *spec; struct tab_table *t; - char *buf; - const char *filename; int i; for (i = 0, spec = specs; spec; spec = spec->next) @@ -772,26 +795,23 @@ dump_fixed_table (const struct dls_var_spec *specs, fmt_to_string (&spec->input)); } - filename = fh_handle_name (handle); - if (filename == NULL) - filename = ""; - buf = local_alloc (strlen (filename) + INT_DIGITS + 80); - sprintf (buf, (handle != inline_file - ? ngettext ("Reading %d record from file %s.", - "Reading %d records from file %s.", nrec) - : ngettext ("Reading %d record from the command file.", - "Reading %d records from the command file.", - nrec)), - nrec, filename); - - tab_title (t, 0, buf); + if (fh != NULL) + tab_title (t, 1, ngettext ("Reading %d record from file %s.", + "Reading %d records from file %s.", rec_cnt), + rec_cnt, handle_get_filename (fh)); + else + tab_title (t, 1, ngettext ("Reading %d record from the command file.", + "Reading %d records from the command file.", + rec_cnt), + rec_cnt); tab_submit (t); - fh_handle_name (NULL); - local_free (buf); } /* Free-format parsing. */ +/* Parses variable specifications for DATA LIST FREE and adds + them to the linked list with head FIRST and tail LAST. + Returns nonzero only if successful. */ static int parse_free (struct dls_var_spec **first, struct dls_var_spec **last) { @@ -800,16 +820,17 @@ parse_free (struct dls_var_spec **first, struct dls_var_spec **last) { struct fmt_spec input, output; char **name; - int name_cnt; + size_t name_cnt; int width; - int i; + size_t i; if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE)) return 0; + if (lex_match ('(')) { if (!parse_format_specifier (&input, 0) - || !check_input_specifier (&input) + || !check_input_specifier (&input, 1) || !lex_force_match (')')) { for (i = 0; i < name_cnt; i++) @@ -822,10 +843,8 @@ parse_free (struct dls_var_spec **first, struct dls_var_spec **last) else { lex_match ('*'); - input.type = FMT_F; - input.w = 8; - input.d = 0; - output = set_format; + input = make_input_format (FMT_F, 8, 0); + output = *get_format (); } if (input.type == FMT_A || input.type == FMT_AHEX) @@ -838,6 +857,7 @@ parse_free (struct dls_var_spec **first, struct dls_var_spec **last) struct variable *v; v = dict_create_var (default_dict, name[i], width); + if (!v) { msg (SE, _("%s is a duplicate variable name."), name[i]); @@ -852,7 +872,7 @@ parse_free (struct dls_var_spec **first, struct dls_var_spec **last) spec->input = input; spec->v = v; spec->fv = v->fv; - strcpy (spec->name, name[i]); + str_copy_trunc (spec->name, sizeof spec->name, v->name); append_var_spec (first, last, spec); } for (i = 0; i < name_cnt; i++) @@ -860,15 +880,14 @@ parse_free (struct dls_var_spec **first, struct dls_var_spec **last) free (name); } - if (token != '.') - lex_error (_("expecting end of command")); - return 1; + return lex_end_of_command () == CMD_SUCCESS; } /* Displays a table giving information on free-format variable parsing on DATA LIST. */ static void -dump_free_table (const struct data_list_pgm *dls) +dump_free_table (const struct data_list_pgm *dls, + const struct file_handle *fh) { struct tab_table *t; int i; @@ -897,86 +916,126 @@ dump_free_table (const struct data_list_pgm *dls) tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input)); } } - - { - const char *filename; - - filename = fh_handle_name (dls->handle); - if (filename == NULL) - filename = ""; - tab_title (t, 1, - (dls->handle != inline_file - ? _("Reading free-form data from file %s.") - : _("Reading free-form data from the command file.")), - filename); - } + + if (fh != NULL) + tab_title (t, 1, _("Reading free-form data from file %s."), + handle_get_filename (fh)); + else + tab_title (t, 1, _("Reading free-form data from the command file.")); tab_submit (t); - fh_handle_name (NULL); } /* Input procedure. */ -/* Extracts a field from the current position in the current record. - Fields can be unquoted or quoted with single- or double-quote - characters. *RET_LEN is set to the field length, *RET_CP is set to - the field itself. After parsing the field, sets the current - position in the record to just past the field. Returns 0 on - failure or a 1-based column number indicating the beginning of the - field on success. */ +/* Extracts a field from the current position in the current + record. Fields can be unquoted or quoted with single- or + double-quote characters. *FIELD is set to the field content. + After parsing the field, sets the current position in the + record to just past the field and any trailing delimiter. + END_BLANK is used internally; it should be initialized by the + caller to 0 and left alone afterward. Returns 0 on failure or + a 1-based column number indicating the beginning of the field + on success. */ static int -cut_field (const struct data_list_pgm *dls, char **ret_cp, int *ret_len) +cut_field (const struct data_list_pgm *dls, struct fixed_string *field, + int *end_blank) { - char *cp, *ep; - int len; - - cp = dfm_get_record (dls->handle, &len); - if (!cp) - return 0; + struct fixed_string line; + char *cp; + size_t column_start; - ep = cp + len; - - /* Skip leading whitespace and commas. */ - while ((isspace ((unsigned char) *cp) || *cp == ',') && cp < ep) - cp++; - if (cp >= ep) + if (dfm_eof (dls->reader)) return 0; + if (dls->delim_cnt == 0) + dfm_expand_tabs (dls->reader); + dfm_get_record (dls->reader, &line); - /* Three types of fields: quoted with ', quoted with ", unquoted. */ - if (*cp == '\'' || *cp == '"') + cp = ls_c_str (&line); + if (dls->delim_cnt == 0) { - int quote = *cp; - - *ret_cp = ++cp; - while (cp < ep && *cp != quote) - cp++; - *ret_len = cp - *ret_cp; - if (cp < ep) - cp++; + /* Skip leading whitespace. */ + while (cp < ls_end (&line) && isspace ((unsigned char) *cp)) + cp++; + if (cp >= ls_end (&line)) + return 0; + + /* Handle actual data, whether quoted or unquoted. */ + if (*cp == '\'' || *cp == '"') + { + int quote = *cp; + + field->string = ++cp; + while (cp < ls_end (&line) && *cp != quote) + cp++; + field->length = cp - field->string; + if (cp < ls_end (&line)) + cp++; + else + msg (SW, _("Quoted string missing terminating `%c'."), quote); + } else - msg (SW, _("Scope of string exceeds line.")); + { + field->string = cp; + while (cp < ls_end (&line) + && !isspace ((unsigned char) *cp) && *cp != ',') + cp++; + field->length = cp - field->string; + } + + /* Skip trailing whitespace and a single comma if present. */ + while (cp < ls_end (&line) && isspace ((unsigned char) *cp)) + cp++; + if (cp < ls_end (&line) && *cp == ',') + cp++; } - else + else { - *ret_cp = cp; - while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',') - cp++; - *ret_len = cp - *ret_cp; + if (cp >= ls_end (&line)) + { + int column = dfm_column_start (dls->reader); + /* A blank line or a line that ends in \t has a + trailing blank field. */ + if (column == 1 || (column > 1 && cp[-1] == '\t')) + { + if (*end_blank == 0) + { + *end_blank = 1; + field->string = ls_end (&line); + field->length = 0; + dfm_forward_record (dls->reader); + return column; + } + else + { + *end_blank = 0; + return 0; + } + } + else + return 0; + } + else + { + field->string = cp; + while (cp < ls_end (&line) + && memchr (dls->delims, *cp, dls->delim_cnt) == NULL) + cp++; + field->length = cp - field->string; + if (cp < ls_end (&line)) + cp++; + } } - - { - int beginning_column; - - dfm_set_record (dls->handle, *ret_cp); - beginning_column = dfm_get_cur_col (dls->handle) + 1; + + dfm_forward_columns (dls->reader, field->string - line.string); + column_start = dfm_column_start (dls->reader); - dfm_set_record (dls->handle, cp); + dfm_forward_columns (dls->reader, cp - field->string); - return beginning_column; - } + return column_start; } -typedef int data_list_read_func (const struct data_list_pgm *); +typedef int data_list_read_func (const struct data_list_pgm *, struct ccase *); static data_list_read_func read_from_data_list_fixed; static data_list_read_func read_from_data_list_free; static data_list_read_func read_from_data_list_list; @@ -990,92 +1049,94 @@ get_data_list_read_func (const struct data_list_pgm *dls) { case DLS_FIXED: return read_from_data_list_fixed; - break; case DLS_FREE: return read_from_data_list_free; - break; case DLS_LIST: return read_from_data_list_list; - break; default: assert (0); + abort (); } } -/* Reads a case from the data file and parses it according to - fixed-format syntax rules. Returns -1 on success, -2 at end - of file. */ +/* Reads a case from the data file into C, parsing it according + to fixed-format syntax rules in DLS. Returns -1 on success, + -2 at end of file. */ static int -read_from_data_list_fixed (const struct data_list_pgm *dls) +read_from_data_list_fixed (const struct data_list_pgm *dls, + struct ccase *c) { struct dls_var_spec *var_spec = dls->first; int i; - if (!dfm_get_record (dls->handle, NULL)) + if (dfm_eof (dls->reader)) return -2; - for (i = 1; i <= dls->nrec; i++) + for (i = 1; i <= dls->rec_cnt; i++) { - int len; - char *line = dfm_get_record (dls->handle, &len); + struct fixed_string line; - if (!line) + if (dfm_eof (dls->reader)) { /* Note that this can't occur on the first record. */ msg (SW, _("Partial case of %d of %d records discarded."), - i - 1, dls->nrec); + i - 1, dls->rec_cnt); return -2; } + dfm_expand_tabs (dls->reader); + dfm_get_record (dls->reader, &line); for (; var_spec && i == var_spec->rec; var_spec = var_spec->next) { struct data_in di; - data_in_finite_line (&di, line, len, var_spec->fc, var_spec->lc); - di.v = &temp_case->data[var_spec->fv]; - di.flags = 0; + data_in_finite_line (&di, ls_c_str (&line), ls_length (&line), + var_spec->fc, var_spec->lc); + di.v = case_data_rw (c, var_spec->fv); + di.flags = DI_IMPLIED_DECIMALS; di.f1 = var_spec->fc; di.format = var_spec->input; data_in (&di); } - dfm_fwd_record (dls->handle); + dfm_forward_record (dls->reader); } return -1; } -/* Reads a case from the data file and parses it according to - free-format syntax rules. Returns -1 on success, -2 at end of - file. */ +/* Reads a case from the data file into C, parsing it according + to free-format syntax rules in DLS. Returns -1 on success, + -2 at end of file. */ static int -read_from_data_list_free (const struct data_list_pgm *dls) +read_from_data_list_free (const struct data_list_pgm *dls, + struct ccase *c) { struct dls_var_spec *var_spec; - char *field; - int len; + int end_blank = 0; for (var_spec = dls->first; var_spec; var_spec = var_spec->next) { + struct fixed_string field; int column; /* Cut out a field and read in a new record if necessary. */ for (;;) { - column = cut_field (dls, &field, &len); + column = cut_field (dls, &field, &end_blank); if (column != 0) break; - if (dfm_get_record (dls->handle, NULL)) - dfm_fwd_record (dls->handle); - if (!dfm_get_record (dls->handle, NULL)) + if (!dfm_eof (dls->reader)) + dfm_forward_record (dls->reader); + if (dfm_eof (dls->reader)) { if (var_spec != dls->first) msg (SW, _("Partial case discarded. The first variable " - "missing was %s."), var_spec->name); + "missing was %s."), var_spec->name); return -2; } } @@ -1083,9 +1144,9 @@ read_from_data_list_free (const struct data_list_pgm *dls) { struct data_in di; - di.s = field; - di.e = field + len; - di.v = &temp_case->data[var_spec->fv]; + di.s = ls_c_str (&field); + di.e = ls_end (&field); + di.v = case_data_rw (c, var_spec->fv); di.flags = 0; di.f1 = column; di.format = var_spec->input; @@ -1099,34 +1160,36 @@ read_from_data_list_free (const struct data_list_pgm *dls) list-format syntax rules. Returns -1 on success, -2 at end of file. */ static int -read_from_data_list_list (const struct data_list_pgm *dls) +read_from_data_list_list (const struct data_list_pgm *dls, + struct ccase *c) { struct dls_var_spec *var_spec; - char *field; - int len; + int end_blank = 0; - if (!dfm_get_record (dls->handle, NULL)) + if (dfm_eof (dls->reader)) return -2; for (var_spec = dls->first; var_spec; var_spec = var_spec->next) { + struct fixed_string field; + int column; + /* Cut out a field and check for end-of-line. */ - int column = cut_field (dls, &field, &len); - + column = cut_field (dls, &field, &end_blank); if (column == 0) { - if (set_undefined) + if (get_undefined ()) msg (SW, _("Missing value(s) for all variables from %s onward. " - "These will be filled with the system-missing value " - "or blanks, as appropriate."), + "These will be filled with the system-missing value " + "or blanks, as appropriate."), var_spec->name); - for (; var_spec; var_spec = var_spec->next) + for (; var_spec; var_spec = var_spec->next) { int width = get_format_var_width (&var_spec->input); if (width == 0) - temp_case->data[var_spec->fv].f = SYSMIS; + case_data_rw (c, var_spec->fv)->f = SYSMIS; else - memset (temp_case->data[var_spec->fv].s, ' ', width); + memset (case_data_rw (c, var_spec->fv)->s, ' ', width); } break; } @@ -1134,9 +1197,9 @@ read_from_data_list_list (const struct data_list_pgm *dls) { struct data_in di; - di.s = field; - di.e = field + len; - di.v = &temp_case->data[var_spec->fv]; + di.s = ls_c_str (&field); + di.e = ls_end (&field); + di.v = case_data_rw (c, var_spec->fv); di.flags = 0; di.f1 = column; di.format = var_spec->input; @@ -1144,7 +1207,7 @@ read_from_data_list_list (const struct data_list_pgm *dls) } } - dfm_fwd_record (dls->handle); + dfm_forward_record (dls->reader); return -1; } @@ -1162,30 +1225,29 @@ destroy_dls_var_spec (struct dls_var_spec *spec) } } -/* Destroys DATA LIST transformation PGM. */ +/* Destroys DATA LIST transformation DLS. */ static void -destroy_dls (struct trns_header *pgm) +data_list_trns_free (void *dls_) { - struct data_list_pgm *dls = (struct data_list_pgm *) pgm; + struct data_list_pgm *dls = dls_; + free (dls->delims); destroy_dls_var_spec (dls->first); - fh_close_handle (dls->handle); - free (pgm); + dfm_close_reader (dls->reader); + free (dls); } -/* Note that since this is exclusively an input program, C is - guaranteed to be temp_case. */ +/* Handle DATA LIST transformation DLS, parsing data into C. */ static int -read_one_case (struct trns_header *t, struct ccase *c UNUSED, - int case_num UNUSED) +data_list_trns_proc (void *dls_, struct ccase *c, int case_num UNUSED) { - struct data_list_pgm *dls = (struct data_list_pgm *) t; + struct data_list_pgm *dls = dls_; data_list_read_func *read_func; int retval; - dfm_push (dls->handle); + dfm_push (dls->reader); read_func = get_data_list_read_func (dls); - retval = read_func (dls); + retval = read_func (dls, c); /* Handle end of file. */ if (retval == -2) @@ -1196,7 +1258,7 @@ read_one_case (struct trns_header *t, struct ccase *c UNUSED, { msg (SE, _("Attempt to read past end of file.")); err_failure (); - dfm_pop (dls->handle); + dfm_pop (dls->reader); return -2; } @@ -1211,14 +1273,14 @@ read_one_case (struct trns_header *t, struct ccase *c UNUSED, { if (retval == -2) { - temp_case->data[dls->end->fv].f = 1.0; + case_data_rw (c, dls->end->fv)->f = 1.0; retval = -1; } else - temp_case->data[dls->end->fv].f = 0.0; + case_data_rw (c, dls->end->fv)->f = 0.0; } - dfm_pop (dls->handle); + dfm_pop (dls->reader); return retval; } @@ -1227,25 +1289,24 @@ read_one_case (struct trns_header *t, struct ccase *c UNUSED, write_case(). */ static void data_list_source_read (struct case_source *source, + struct ccase *c, write_case_func *write_case, write_case_data wc_data) { struct data_list_pgm *dls = source->aux; data_list_read_func *read_func = get_data_list_read_func (dls); - dfm_push (dls->handle); - while (read_func (dls) != -2) + dfm_push (dls->reader); + while (read_func (dls, c) != -2) if (!write_case (wc_data)) break; - dfm_pop (dls->handle); - - fh_close_handle (dls->handle); + dfm_pop (dls->reader); } /* Destroys the source's internal data. */ static void data_list_source_destroy (struct case_source *source) { - destroy_dls (source->aux); + data_list_trns_free (source->aux); } const struct case_source_class data_list_source_class = @@ -1268,9 +1329,8 @@ struct rpd_num_or_var /* REPEATING DATA private data structure. */ struct repeating_data_trns { - struct trns_header h; struct dls_var_spec *first, *last; /* Variable parsing specifications. */ - struct file_handle *handle; /* Input file, never NULL. */ + struct dfm_reader *reader; /* Input file, never NULL. */ struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */ struct rpd_num_or_var starts_end; /* STARTS=, after the dash. */ @@ -1301,20 +1361,18 @@ int cmd_repeating_data (void) { struct repeating_data_trns *rpd; - - /* 0=print no table, 1=print table. (TABLE subcommand.) */ - int table = 1; - - /* Bits are set when a particular subcommand has been seen. */ - unsigned seen = 0; + int table = 1; /* Print table? */ + bool saw_starts = false; /* Saw STARTS subcommand? */ + bool saw_occurs = false; /* Saw OCCURS subcommand? */ + bool saw_length = false; /* Saw LENGTH subcommand? */ + bool saw_continued = false; /* Saw CONTINUED subcommand? */ + bool saw_id = false; /* Saw ID subcommand? */ + struct file_handle *const fh = default_handle; - lex_match_id ("REPEATING"); - lex_match_id ("DATA"); - assert (case_source_is_complex (vfm_source)); rpd = xmalloc (sizeof *rpd); - rpd->handle = default_handle; + rpd->reader = dfm_open_reader (default_handle); rpd->first = rpd->last = NULL; rpd->starts_beg.num = 0; rpd->starts_beg.var = NULL; @@ -1330,11 +1388,12 @@ cmd_repeating_data (void) { if (lex_match_id ("FILE")) { + struct file_handle *file; lex_match ('='); - rpd->handle = fh_parse_file_handle (); - if (!rpd->handle) + file = fh_parse (); + if (file == NULL) goto error; - if (rpd->handle != default_handle) + if (file != fh) { msg (SE, _("REPEATING DATA must use the same file as its " "corresponding DATA LIST or FILE TYPE.")); @@ -1344,13 +1403,13 @@ cmd_repeating_data (void) else if (lex_match_id ("STARTS")) { lex_match ('='); - if (seen & 1) + if (saw_starts) { msg (SE, _("%s subcommand given multiple times."),"STARTS"); goto error; } - seen |= 1; - + saw_starts = true; + if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column")) goto error; @@ -1360,11 +1419,10 @@ cmd_repeating_data (void) if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column")) goto error; } else { - /* Otherwise, rpd->starts_end is left uninitialized. - This is okay. We will initialize it later from the - record length of the file. We can't do this now - because we can't be sure that the user has specified - the file handle yet. */ + /* Otherwise, rpd->starts_end is uninitialized. We + will initialize it later from the record length + of the file. We can't do so now because the + file handle may not be specified yet. */ } if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0 @@ -1379,12 +1437,12 @@ cmd_repeating_data (void) else if (lex_match_id ("OCCURS")) { lex_match ('='); - if (seen & 2) + if (saw_occurs) { msg (SE, _("%s subcommand given multiple times."),"OCCURS"); goto error; } - seen |= 2; + saw_occurs = true; if (!parse_num_or_var (&rpd->occurs, "OCCURS")) goto error; @@ -1392,12 +1450,12 @@ cmd_repeating_data (void) else if (lex_match_id ("LENGTH")) { lex_match ('='); - if (seen & 4) + if (saw_length) { msg (SE, _("%s subcommand given multiple times."),"LENGTH"); goto error; } - seen |= 4; + saw_length = true; if (!parse_num_or_var (&rpd->length, "LENGTH")) goto error; @@ -1405,16 +1463,17 @@ cmd_repeating_data (void) else if (lex_match_id ("CONTINUED")) { lex_match ('='); - if (seen & 8) + if (saw_continued) { msg (SE, _("%s subcommand given multiple times."),"CONTINUED"); goto error; } - seen |= 8; + saw_continued = true; if (!lex_match ('/')) { - if (!parse_num_or_var (&rpd->cont_beg, "CONTINUED beginning column")) + if (!parse_num_or_var (&rpd->cont_beg, + "CONTINUED beginning column")) goto error; lex_negative_to_dash (); @@ -1438,12 +1497,12 @@ cmd_repeating_data (void) else if (lex_match_id ("ID")) { lex_match ('='); - if (seen & 16) + if (saw_id) { msg (SE, _("%s subcommand given multiple times."),"ID"); goto error; } - seen |= 16; + saw_id = true; if (!lex_force_int ()) goto error; @@ -1488,7 +1547,7 @@ cmd_repeating_data (void) goto error; find_variable_input_spec (rpd->id_var, &rpd->id_spec); - rpd->id_value = xmalloc (sizeof *rpd->id_value * rpd->id_var->nv); + rpd->id_value = xnmalloc (rpd->id_var->nv, sizeof *rpd->id_value); } else if (lex_match_id ("TABLE")) table = 1; @@ -1507,82 +1566,92 @@ cmd_repeating_data (void) } /* Comes here when DATA specification encountered. */ - if ((seen & (1 | 2)) != (1 | 2)) + if (!saw_starts || !saw_occurs) { - if ((seen & 1) == 0) + if (!saw_starts) msg (SE, _("Missing required specification STARTS.")); - if ((seen & 2) == 0) + if (!saw_occurs) msg (SE, _("Missing required specification OCCURS.")); goto error; } /* Enforce ID restriction. */ - if ((seen & 16) && !(seen & 8)) + if (saw_id && !saw_continued) { msg (SE, _("ID specified without CONTINUED.")); goto error; } - /* Calculate starts_end, cont_end if necessary. */ - if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL) - rpd->starts_end.num = fh_record_width (rpd->handle); - if (rpd->cont_end.num == 0 && rpd->starts_end.var == NULL) - rpd->cont_end.num = fh_record_width (rpd->handle); - - /* Calculate length if possible. */ - if ((seen & 4) == 0) + /* Calculate and check starts_end, cont_end if necessary. */ + if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL) { - struct dls_var_spec *iter; - - for (iter = rpd->first; iter; iter = iter->next) - { - if (iter->lc > rpd->length.num) - rpd->length.num = iter->lc; - } - assert (rpd->length.num != 0); + rpd->starts_end.num = fh != NULL ? handle_get_record_width (fh) : 80; + if (rpd->starts_beg.num != 0 + && rpd->starts_beg.num > rpd->starts_end.num) + { + msg (SE, _("STARTS beginning column (%d) exceeds " + "default STARTS ending column taken from file's " + "record width (%d)."), + rpd->starts_beg.num, rpd->starts_end.num); + goto error; + } + } + if (rpd->cont_end.num == 0 && rpd->cont_end.var == NULL) + { + rpd->cont_end.num = fh != NULL ? handle_get_record_width (fh) : 80; + if (rpd->cont_beg.num != 0 + && rpd->cont_beg.num > rpd->cont_end.num) + { + msg (SE, _("CONTINUED beginning column (%d) exceeds " + "default CONTINUED ending column taken from file's " + "record width (%d)."), + rpd->cont_beg.num, rpd->cont_end.num); + goto error; + } } lex_match ('='); if (!parse_repeating_data (&rpd->first, &rpd->last)) goto error; + /* Calculate length if necessary. */ + if (!saw_length) + { + struct dls_var_spec *iter; + + for (iter = rpd->first; iter; iter = iter->next) + if (iter->lc > rpd->length.num) + rpd->length.num = iter->lc; + assert (rpd->length.num != 0); + } + if (table) - dump_fixed_table (rpd->first, rpd->handle, rpd->last->rec); - - { - struct repeating_data_trns *new_trns; - - rpd->h.proc = repeating_data_trns_proc; - rpd->h.free = repeating_data_trns_free; + dump_fixed_table (rpd->first, fh, rpd->last->rec); - new_trns = xmalloc (sizeof *new_trns); - memcpy (new_trns, &rpd, sizeof *new_trns); - add_transformation ((struct trns_header *) new_trns); - } + add_transformation (repeating_data_trns_proc, repeating_data_trns_free, rpd); return lex_end_of_command (); error: - destroy_dls_var_spec (rpd->first); - free (rpd->id_value); + repeating_data_trns_free (rpd); return CMD_FAILURE; } -/* Because of the way that DATA LIST is structured, it's not trivial - to determine what input format is associated with a given variable. - This function finds the input format specification for variable V - and puts it in SPEC. */ +/* Finds the input format specification for variable V and puts + it in SPEC. Because of the way that DATA LIST is structured, + this is nontrivial. */ static void find_variable_input_spec (struct variable *v, struct fmt_spec *spec) { - int i; + size_t i; for (i = 0; i < n_trns; i++) { - struct data_list_pgm *pgm = (struct data_list_pgm *) t_trns[i]; + struct transformation *trns = &t_trns[i]; - if (pgm->h.proc == read_one_case) + if (trns->proc == data_list_trns_proc) { + struct data_list_pgm *pgm = trns->private; struct dls_var_spec *iter; for (iter = pgm->first; iter; iter = iter->next) @@ -1616,7 +1685,7 @@ parse_num_or_var (struct rpd_num_or_var *value, const char *message) return 0; } } - else if (lex_integer_p ()) + else if (lex_is_integer ()) { value->num = lex_integer (); @@ -1634,13 +1703,14 @@ parse_num_or_var (struct rpd_num_or_var *value, const char *message) return 1; } -/* Parses data specifications for repeating data groups. Taken from - parse_fixed(). Returns nonzero only if successful. */ +/* Parses data specifications for repeating data groups, adding + them to the linked list with head FIRST and tail LAST. + Returns nonzero only if successful. */ static int parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last) { struct fixed_parsing_state fx; - int i; + size_t i; fx.recno = 0; fx.sc = 1; @@ -1650,7 +1720,7 @@ parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last) if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE)) return 0; - if (token == T_NUM) + if (lex_is_number ()) { if (!fixed_parse_compatible (&fx, first, last)) goto fail; @@ -1663,7 +1733,7 @@ parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last) else { msg (SE, _("SPSS-like or FORTRAN-like format " - "specification expected after variable names.")); + "specification expected after variable names.")); goto fail; } @@ -1671,11 +1741,6 @@ parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last) free (fx.name[i]); free (fx.name); } - if (token != '.') - { - lex_error (_("expecting end of command")); - return 0; - } return 1; @@ -1693,21 +1758,13 @@ parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last) static int realize_value (struct rpd_num_or_var *n, struct ccase *c) { - if (n->num > 0) - return n->num; - - assert (n->num == 0); if (n->var != NULL) { - double v = c->data[n->var->fv].f; - - if (v == SYSMIS || v <= INT_MIN || v >= INT_MAX) - return -1; - else - return v; + double v = case_num (c, n->var->fv); + return v != SYSMIS && v >= INT_MIN && v <= INT_MAX ? v : -1; } else - return 0; + return n->num; } /* Parameter record passed to rpd_parse_record(). */ @@ -1806,7 +1863,7 @@ rpd_parse_record (const struct rpd_parse_info *info) struct data_in di; data_in_finite_line (&di, info->line, info->len, fc, lc); - di.v = &info->c->data[var_spec->fv]; + di.v = case_data_rw (info->c, var_spec->fv); di.flags = 0; di.f1 = fc + 1; di.format = var_spec->input; @@ -1827,24 +1884,22 @@ rpd_parse_record (const struct rpd_parse_info *info) return occurrences; } -/* Analogous to read_one_case; reads one set of repetitions of the - elements in the REPEATING DATA structure. Returns -1 on success, - -2 on end of file or on failure. */ +/* Reads one set of repetitions of the elements in the REPEATING + DATA structure. Returns -1 on success, -2 on end of file or + on failure. */ int -repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, - int case_num UNUSED) +repeating_data_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED) { - struct repeating_data_trns *t = (struct repeating_data_trns *) trns; + struct repeating_data_trns *t = trns_; - char *line; /* Current record. */ - int len; /* Length of current record. */ + struct fixed_string line; /* Current record. */ int starts_beg; /* Starting column. */ int starts_end; /* Ending column. */ int occurs; /* Number of repetitions. */ int length; /* Length of each occurrence. */ - int cont_beg; /* Starting column for continuation lines. */ - int cont_end; /* Ending column for continuation lines. */ + int cont_beg; /* Starting column for continuation lines. */ + int cont_end; /* Ending column for continuation lines. */ int occurs_left; /* Number of occurrences remaining. */ @@ -1852,14 +1907,15 @@ repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, int skip_first_record = 0; - dfm_push (t->handle); + dfm_push (t->reader); /* Read the current record. */ - dfm_bkwd_record (t->handle, 1); - line = dfm_get_record (t->handle, &len); - if (line == NULL) + dfm_reread_record (t->reader, 1); + dfm_expand_tabs (t->reader); + if (dfm_eof (t->reader)) return -2; - dfm_fwd_record (t->handle); + dfm_get_record (t->reader, &line); + dfm_forward_record (t->reader); /* Calculate occurs, length. */ occurs_left = occurs = realize_value (&t->occurs, c); @@ -1913,8 +1969,8 @@ repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, { struct rpd_parse_info info; info.trns = t; - info.line = line; - info.len = len; + info.line = ls_c_str (&line); + info.len = ls_length (&line); info.beg = starts_beg; info.end = starts_end; info.ofs = length; @@ -1949,8 +2005,7 @@ repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, assert (occurs_left >= 0); /* Read in another record. */ - line = dfm_get_record (t->handle, &len); - if (line == NULL) + if (dfm_eof (t->reader)) { tmsg (SE, RPD_ERR, _("Unexpected end of file with %d repetitions " @@ -1958,12 +2013,14 @@ repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, occurs_left, occurs); return -2; } - dfm_fwd_record (t->handle); + dfm_expand_tabs (t->reader); + dfm_get_record (t->reader, &line); + dfm_forward_record (t->reader); /* Parse this record. */ info.trns = t; - info.line = line; - info.len = len; + info.line = ls_c_str (&line); + info.len = ls_length (&line); info.beg = cont_beg; info.end = cont_end; info.ofs = length; @@ -1976,35 +2033,35 @@ repeating_data_trns_proc (struct trns_header *trns, struct ccase *c, occurs_left -= code; } - dfm_pop (t->handle); + dfm_pop (t->reader); /* FIXME: This is a kluge until we've implemented multiplexing of transformations. */ return -3; } +/* Frees a REPEATING DATA transformation. */ void -repeating_data_trns_free (struct trns_header *rpd_) +repeating_data_trns_free (void *rpd_) { - struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_; + struct repeating_data_trns *rpd = rpd_; destroy_dls_var_spec (rpd->first); - fh_close_handle (rpd->handle); + dfm_close_reader (rpd->reader); free (rpd->id_value); + free (rpd); } -/* This is a kluge. It is only here until I have more time - tocome up with something better. It lets - repeating_data_trns_proc() know how to write the cases that it - composes. */ +/* Lets repeating_data_trns_proc() know how to write the cases + that it composes. Not elegant. */ void -repeating_data_set_write_case (struct trns_header *trns, +repeating_data_set_write_case (struct transformation *trns_, write_case_func *write_case, write_case_data wc_data) { - struct repeating_data_trns *t = (struct repeating_data_trns *) trns; + struct repeating_data_trns *t = trns_->private; - assert (trns->proc == repeating_data_trns_proc); + assert (trns_->proc == repeating_data_trns_proc); t->write_case = write_case; t->wc_data = wc_data; }