X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fdata-list.c;h=4fea7a407038364aca731376c6c86d11d9e66caf;hb=d0371553a98cd169353bf6d211e375e5ffc3a3bd;hp=1d55d8b2ae06e8b7e7bd3fde851c997a81e064db;hpb=0fa141762183890ebd139ccd9264f08db9011539;p=pspp-builds.git diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c index 1d55d8b2..4fea7a40 100644 --- a/src/language/data-io/data-list.c +++ b/src/language/data-io/data-list.c @@ -1,33 +1,31 @@ -/* PSPP - computes sample statistics. +/* PSPP - a program for statistical analysis. Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. - Written by Ben Pfaff . - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include #include #include +#include #include #include -#include #include -#include #include +#include +#include #include #include #include @@ -42,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -52,8 +49,8 @@ #include #include -#include "size_max.h" #include "xsize.h" +#include "xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -76,7 +73,7 @@ struct dls_var_spec }; static struct dls_var_spec * -ll_to_dls_var_spec (struct ll *ll) +ll_to_dls_var_spec (struct ll *ll) { return ll_data (ll, struct dls_var_spec, ll); } @@ -100,13 +97,14 @@ struct data_list_pgm int record_cnt; /* Number of records. */ struct string delims; /* Field delimiters. */ int skip_records; /* Records to skip before first case. */ + size_t value_cnt; /* Number of `union value's in case. */ }; -static const struct case_source_class data_list_source_class; +static const struct casereader_class data_list_casereader_class; -static bool parse_fixed (struct lexer *, struct dictionary *dict, +static bool parse_fixed (struct lexer *, struct dictionary *dict, struct pool *tmp_pool, struct data_list_pgm *); -static bool parse_free (struct lexer *, struct dictionary *dict, +static bool parse_free (struct lexer *, struct dictionary *dict, struct pool *tmp_pool, struct data_list_pgm *); static void dump_fixed_table (const struct ll_list *, const struct file_handle *, int record_cnt); @@ -119,15 +117,14 @@ static trns_proc_func data_list_trns_proc; int cmd_data_list (struct lexer *lexer, struct dataset *ds) { - struct dictionary *dict = dataset_dict (ds); + struct dictionary *dict; struct data_list_pgm *dls; int table = -1; /* Print table if nonzero, -1=undecided. */ - struct file_handle *fh = fh_inline_file (); + struct file_handle *fh = NULL; struct pool *tmp_pool; bool ok; - if (!in_input_program ()) - discard_variables (ds); + dict = in_input_program () ? dataset_dict (ds) : dict_create (); dls = pool_create_container (struct data_list_pgm, pool); ll_init (&dls->specs); @@ -146,6 +143,7 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) if (lex_match_id (lexer, "FILE")) { lex_match (lexer, '='); + fh_unref (fh); fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE); if (fh == NULL) goto error; @@ -175,13 +173,13 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) msg (SE, _("The END subcommand may only be specified once.")); goto error; } - + lex_match (lexer, '='); if (!lex_force_id (lexer)) goto error; - dls->end = dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)); - if (!dls->end) - dls->end = dict_create_var_assert (dataset_dict (ds), lex_tokid (lexer), 0); + dls->end = dict_lookup_var (dict, lex_tokid (lexer)); + if (!dls->end) + dls->end = dict_create_var_assert (dict, lex_tokid (lexer), 0); lex_get (lexer); } else if (lex_token (lexer) == T_ID) @@ -190,7 +188,7 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) table = 0; else if (lex_match_id (lexer, "TABLE")) table = 1; - else + else { int type; if (lex_match_id (lexer, "FIXED")) @@ -199,7 +197,7 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) type = DLS_FREE; else if (lex_match_id (lexer, "LIST")) type = DLS_LIST; - else + else { lex_error (lexer, NULL); goto error; @@ -214,7 +212,7 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) dls->type = type; if ((dls->type == DLS_FREE || dls->type == DLS_LIST) - && lex_match (lexer, '(')) + && lex_match (lexer, '(')) { while (!lex_match (lexer, ')')) { @@ -227,7 +225,7 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) delim = ds_first (lex_tokstr (lexer)); lex_get (lexer); } - else + else { lex_error (lexer, NULL); goto error; @@ -247,11 +245,19 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) } } + if (fh == NULL) + fh = fh_inline_file (); fh_set_default_handle (fh); if (dls->type == -1) dls->type = DLS_FIXED; + if (dls->type != DLS_FIXED && dls->end != NULL) + { + msg (SE, _("The END keyword may be used only with DATA LIST FIXED.")); + goto error; + } + if (table == -1) table = dls->type != DLS_FREE; @@ -274,16 +280,27 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) if (dls->reader == NULL) goto error; + dls->value_cnt = dict_get_next_value_idx (dict); + if (in_input_program ()) add_transformation (ds, data_list_trns_proc, data_list_trns_free, dls); - else - proc_set_source (ds, create_case_source (&data_list_source_class, dls)); + else + { + struct casereader *reader; + reader = casereader_create_sequential (NULL, + dict_get_next_value_idx (dict), + -1, &data_list_casereader_class, + dls); + proc_set_active_file (ds, reader, dict); + } pool_destroy (tmp_pool); + fh_unref (fh); return CMD_SUCCESS; error: + fh_unref (fh); data_list_trns_free (dls); return CMD_CASCADING_FAILURE; } @@ -295,7 +312,7 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) needed once parsing is complete. Returns true only if successful. */ static bool -parse_fixed (struct lexer *lexer, struct dictionary *dict, +parse_fixed (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool, struct data_list_pgm *dls) { int last_nonempty_record; @@ -311,7 +328,7 @@ parse_fixed (struct lexer *lexer, struct dictionary *dict, /* Parse everything. */ if (!parse_record_placement (lexer, &record, &column) - || !parse_DATA_LIST_vars_pool (lexer, tmp_pool, + || !parse_DATA_LIST_vars_pool (lexer, tmp_pool, &names, &name_cnt, PV_NONE) || !parse_var_placements (lexer, tmp_pool, name_cnt, true, &formats, &format_cnt)) @@ -326,7 +343,7 @@ parse_fixed (struct lexer *lexer, struct dictionary *dict, int width; struct variable *v; struct dls_var_spec *spec; - + name = names[name_idx++]; /* Create variable. */ @@ -336,8 +353,7 @@ parse_fixed (struct lexer *lexer, struct dictionary *dict, { /* Success. */ struct fmt_spec output = fmt_for_output_from_input (f); - v->print = output; - v->write = output; + var_set_both_formats (v, &output); } else { @@ -345,7 +361,7 @@ parse_fixed (struct lexer *lexer, struct dictionary *dict, This can be acceptable if we're in INPUT PROGRAM, but only if the existing variable has the same width as the one we would have - created. */ + created. */ if (!in_input_program ()) { msg (SE, _("%s is a duplicate variable name."), name); @@ -353,14 +369,14 @@ parse_fixed (struct lexer *lexer, struct dictionary *dict, } v = dict_lookup_var_assert (dict, name); - if ((width != 0) != (v->width != 0)) + if ((width != 0) != (var_get_width (v) != 0)) { msg (SE, _("There is already a variable %s of a " "different type."), name); return false; } - if (width != 0 && width != v->width) + if (width != 0 && width != var_get_width (v)) { msg (SE, _("There is already a string variable %s of a " "different width."), name); @@ -371,17 +387,17 @@ parse_fixed (struct lexer *lexer, struct dictionary *dict, /* Create specifier for parsing the variable. */ spec = pool_alloc (dls->pool, sizeof *spec); spec->input = *f; - spec->fv = v->fv; + spec->fv = var_get_case_index (v); spec->record = record; spec->first_column = column; - strcpy (spec->name, v->name); + strcpy (spec->name, var_get_name (v)); ll_push_tail (&dls->specs, &spec->ll); column += f->w; } assert (name_idx == name_cnt); } - if (ll_is_empty (&dls->specs)) + if (ll_is_empty (&dls->specs)) { msg (SE, _("At least one variable must be specified.")); return false; @@ -394,7 +410,7 @@ parse_fixed (struct lexer *lexer, struct dictionary *dict, "should not exist according to RECORDS subcommand.")); return false; } - else if (!dls->record_cnt) + else if (!dls->record_cnt) dls->record_cnt = last_nonempty_record; return true; @@ -448,7 +464,7 @@ dump_fixed_table (const struct ll_list *specs, them to DLS. Uses TMP_POOL for data that is not needed once parsing is complete. Returns true only if successful. */ static bool -parse_free (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool, +parse_free (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool, struct data_list_pgm *dls) { lex_get (lexer); @@ -459,7 +475,7 @@ parse_free (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool, size_t name_cnt; size_t i; - if (!parse_DATA_LIST_vars_pool (lexer, tmp_pool, + if (!parse_DATA_LIST_vars_pool (lexer, tmp_pool, &name, &name_cnt, PV_NONE)) return 0; @@ -467,14 +483,14 @@ parse_free (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool, { if (!parse_format_specifier (lexer, &input) || !fmt_check_input (&input) - || !lex_force_match (lexer, ')')) + || !lex_force_match (lexer, ')')) return NULL; /* As a special case, N format is treated as F format for free-field input. */ if (input.type == FMT_N) input.type = FMT_F; - + output = fmt_for_output_from_input (&input); } else @@ -495,12 +511,12 @@ parse_free (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool, msg (SE, _("%s is a duplicate variable name."), name[i]); return 0; } - v->print = v->write = output; + var_set_both_formats (v, &output); spec = pool_alloc (dls->pool, sizeof *spec); spec->input = input; - spec->fv = v->fv; - strcpy (spec->name, v->name); + spec->fv = var_get_case_index (v); + strcpy (spec->name, var_get_name (v)); ll_push_tail (&dls->specs, &spec->ll); } } @@ -520,7 +536,7 @@ dump_free_table (const struct data_list_pgm *dls, int row; spec_cnt = ll_count (&dls->specs); - + t = tab_create (2, spec_cnt + 1, 0); tab_columns (t, TAB_COL_DOWN, 1); tab_headers (t, 0, 0, 1, 0); @@ -540,11 +556,11 @@ dump_free_table (const struct data_list_pgm *dls, } tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh)); - + tab_submit (t); } -/* Input procedure. */ +/* Input procedure. */ /* Extracts a field from the current position in the current record. Fields can be unquoted or quoted with single- or @@ -552,7 +568,7 @@ dump_free_table (const struct data_list_pgm *dls, *FIELD is set to the field content. The caller must not or destroy this constant string. - + After parsing the field, sets the current position in the record to just past the field and any trailing delimiter. Returns 0 on failure or a 1-based column number indicating the @@ -568,15 +584,15 @@ cut_field (const struct data_list_pgm *dls, struct substring *field) dfm_expand_tabs (dls->reader); line = p = dfm_get_record (dls->reader); - if (ds_is_empty (&dls->delims)) + if (ds_is_empty (&dls->delims)) { bool missing_quote = false; - + /* Skip leading whitespace. */ ss_ltrim (&p, ss_cstr (CC_SPACES)); if (ss_is_empty (p)) return false; - + /* Handle actual data, whether quoted or unquoted. */ if (ss_match_char (&p, '\'')) missing_quote = !ss_get_until (&p, '\'', field); @@ -593,7 +609,7 @@ cut_field (const struct data_list_pgm *dls, struct substring *field) dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p)); } - else + else { if (!ss_is_empty (p)) ss_get_chars (&p, ss_cspan (p, ds_ss (&dls->delims)), field); @@ -603,11 +619,11 @@ cut_field (const struct data_list_pgm *dls, struct substring *field) trailing blank field. */ *field = p; } - else + else return false; /* Advance past the field. - + Also advance past a trailing delimiter, regardless of whether one actually existed. If we "skip" a delimiter that was not actually there, then we will return @@ -627,7 +643,7 @@ static bool read_from_data_list_list (const struct data_list_pgm *, /* Reads a case from DLS into C. Returns true if successful, false at end of file or on I/O error. */ static bool -read_from_data_list (const struct data_list_pgm *dls, struct ccase *c) +read_from_data_list (const struct data_list_pgm *dls, struct ccase *c) { bool retval; @@ -652,16 +668,17 @@ read_from_data_list (const struct data_list_pgm *dls, struct ccase *c) } /* Reads a case from the data file into C, parsing it according - to fixed-format syntax rules in DLS. + to fixed-format syntax rules in DLS. Returns true if successful, false at end of file or on I/O error. */ static bool read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c) { + enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (dls->reader); struct dls_var_spec *spec; int row; - if (dfm_eof (dls->reader)) - return false; + if (dfm_eof (dls->reader)) + return false; spec = ll_to_dls_var_spec (ll_head (&dls->specs)); for (row = 1; row <= dls->record_cnt; row++) @@ -673,14 +690,21 @@ read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c) msg (SW, _("Partial case of %d of %d records discarded."), row - 1, dls->record_cnt); return false; - } + } dfm_expand_tabs (dls->reader); line = dfm_get_record (dls->reader); - ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs) - data_in (ss_substr (line, spec->first_column - 1, spec->input.w), - spec->input.type, spec->input.d, spec->first_column, - case_data_rw (c, spec->fv), fmt_var_width (&spec->input)); + ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs) + { + if (row < spec->record) + break; + + data_in (ss_substr (line, spec->first_column - 1, + spec->input.w), + encoding, spec->input.type, spec->input.d, + spec->first_column, case_data_rw_idx (c, spec->fv), + fmt_var_width (&spec->input)); + } dfm_forward_record (dls->reader); } @@ -689,21 +713,22 @@ read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c) } /* Reads a case from the data file into C, parsing it according - to free-format syntax rules in DLS. + to free-format syntax rules in DLS. Returns true if successful, false at end of file or on I/O error. */ static bool read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) { + enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (dls->reader); struct dls_var_spec *spec; ll_for_each (spec, struct dls_var_spec, ll, &dls->specs) { struct substring field; - + /* Cut out a field and read in a new record if necessary. */ while (!cut_field (dls, &field)) { - if (!dfm_eof (dls->reader)) + if (!dfm_eof (dls->reader)) dfm_forward_record (dls->reader); if (dfm_eof (dls->reader)) { @@ -713,20 +738,21 @@ read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) return false; } } - - data_in (field, spec->input.type, 0, + + data_in (field, encoding, spec->input.type, 0, dfm_get_column (dls->reader, ss_data (field)), - case_data_rw (c, spec->fv), fmt_var_width (&spec->input)); + case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input)); } return true; } /* Reads a case from the data file and parses it according to - list-format syntax rules. + list-format syntax rules. Returns true if successful, false at end of file or on I/O error. */ static bool read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) { + enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (dls->reader); struct dls_var_spec *spec; if (dfm_eof (dls->reader)) @@ -747,16 +773,16 @@ read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) { int width = fmt_var_width (&spec->input); if (width == 0) - case_data_rw (c, spec->fv)->f = SYSMIS; + case_data_rw_idx (c, spec->fv)->f = SYSMIS; else - memset (case_data_rw (c, spec->fv)->s, ' ', width); + memset (case_data_rw_idx (c, spec->fv)->s, ' ', width); } break; } - - data_in (field, spec->input.type, 0, + + data_in (field, encoding, spec->input.type, 0, dfm_get_column (dls->reader, ss_data (field)), - case_data_rw (c, spec->fv), fmt_var_width (&spec->input)); + case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input)); } dfm_forward_record (dls->reader); @@ -783,7 +809,7 @@ data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED) if (read_from_data_list (dls, c)) retval = TRNS_CONTINUE; - else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1) + else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1) { /* An I/O error, or encountering end of file for a second time, should be escalated into a more serious error. */ @@ -791,15 +817,15 @@ data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED) } else retval = TRNS_END_FILE; - + /* If there was an END subcommand handle it. */ - if (dls->end != NULL) + if (dls->end != NULL) { - double *end = &case_data_rw (c, dls->end->fv)->f; - if (retval == TRNS_DROP_CASE) + double *end = &case_data_rw (c, dls->end)->f; + if (retval == TRNS_END_FILE) { *end = 1.0; - retval = TRNS_END_FILE; + retval = TRNS_CONTINUE; } else *end = 0.0; @@ -808,52 +834,47 @@ data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED) return retval; } -/* Reads all the records from the data file and passes them to - write_case(). - Returns true if successful, false if an I/O error occurred. */ +/* Reads one case into OUTPUT_CASE. + Returns true if successful, false at end of file or if an + I/O error occurred. */ static bool -data_list_source_read (struct case_source *source, - struct ccase *c, - write_case_func *write_case, write_case_data wc_data) +data_list_casereader_read (struct casereader *reader UNUSED, void *dls_, + struct ccase *c) { - struct data_list_pgm *dls = source->aux; + struct data_list_pgm *dls = dls_; + bool ok; /* Skip the requested number of records before reading the first case. */ - while (dls->skip_records > 0) + while (dls->skip_records > 0) { if (dfm_eof (dls->reader)) return false; dfm_forward_record (dls->reader); dls->skip_records--; } - - for (;;) - { - bool ok; - if (!read_from_data_list (dls, c)) - return !dfm_reader_error (dls->reader); - - dfm_push (dls->reader); - ok = write_case (wc_data); - dfm_pop (dls->reader); - if (!ok) - return false; - } + case_create (c, dls->value_cnt); + ok = read_from_data_list (dls, c); + if (!ok) + case_destroy (c); + return ok; } -/* Destroys the source's internal data. */ +/* Destroys the casereader. */ static void -data_list_source_destroy (struct case_source *source) +data_list_casereader_destroy (struct casereader *reader UNUSED, void *dls_) { - data_list_trns_free (source->aux); + struct data_list_pgm *dls = dls_; + if (dfm_reader_error (dls->reader)) + casereader_force_error (reader); + data_list_trns_free (dls); } -static const struct case_source_class data_list_source_class = +static const struct casereader_class data_list_casereader_class = { - "DATA LIST", + data_list_casereader_read, + data_list_casereader_destroy, + NULL, NULL, - data_list_source_read, - data_list_source_destroy, };