X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fdata-list.c;h=9f77ed8d9a135b7751ba408662779e70d1b7b70a;hb=c17ea35a73b7a690a54c5c6a213de19f0376e74f;hp=a1faffe46a0e6a025566ec04a1e43e0ecd43b01c;hpb=3a61659a8fc11c51ad5af02b20f5613dcde50382;p=pspp-builds.git diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c index a1faffe4..9f77ed8d 100644 --- a/src/language/data-io/data-list.c +++ b/src/language/data-io/data-list.c @@ -1,21 +1,18 @@ -/* PSPP - computes sample statistics. +/* PSPP - a program for statistical analysis. Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. - Written by Ben Pfaff . - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include @@ -24,10 +21,10 @@ #include #include -#include #include -#include #include +#include +#include #include #include #include @@ -76,7 +73,7 @@ struct dls_var_spec }; static struct dls_var_spec * -ll_to_dls_var_spec (struct ll *ll) +ll_to_dls_var_spec (struct ll *ll) { return ll_data (ll, struct dls_var_spec, ll); } @@ -99,13 +96,15 @@ struct data_list_pgm struct variable *end; /* Variable specified on END subcommand. */ int record_cnt; /* Number of records. */ struct string delims; /* Field delimiters. */ + int skip_records; /* Records to skip before first case. */ + size_t value_cnt; /* Number of `union value's in case. */ }; -static const struct case_source_class data_list_source_class; +static const struct casereader_class data_list_casereader_class; -static bool parse_fixed (struct dictionary *dict, +static bool parse_fixed (struct lexer *, struct dictionary *dict, struct pool *tmp_pool, struct data_list_pgm *); -static bool parse_free (struct dictionary *dict, +static bool parse_free (struct lexer *, struct dictionary *dict, struct pool *tmp_pool, struct data_list_pgm *); static void dump_fixed_table (const struct ll_list *, const struct file_handle *, int record_cnt); @@ -116,17 +115,16 @@ static trns_free_func data_list_trns_free; static trns_proc_func data_list_trns_proc; int -cmd_data_list (struct dataset *ds) +cmd_data_list (struct lexer *lexer, struct dataset *ds) { - struct dictionary *dict = dataset_dict (ds); + struct dictionary *dict; struct data_list_pgm *dls; int table = -1; /* Print table if nonzero, -1=undecided. */ struct file_handle *fh = fh_inline_file (); struct pool *tmp_pool; bool ok; - if (!in_input_program ()) - discard_variables (ds); + dict = in_input_program () ? dataset_dict (ds) : dict_create (); dls = pool_create_container (struct data_list_pgm, pool); ll_init (&dls->specs); @@ -134,64 +132,73 @@ cmd_data_list (struct dataset *ds) dls->type = -1; dls->end = NULL; dls->record_cnt = 0; + dls->skip_records = 0; ds_init_empty (&dls->delims); ds_register_pool (&dls->delims, dls->pool); tmp_pool = pool_create_subpool (dls->pool); - while (token != '/') + while (lex_token (lexer) != '/') { - if (lex_match_id ("FILE")) + if (lex_match_id (lexer, "FILE")) { - lex_match ('='); - fh = fh_parse (FH_REF_FILE | FH_REF_INLINE); + lex_match (lexer, '='); + fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE); if (fh == NULL) goto error; } - else if (lex_match_id ("RECORDS")) + else if (lex_match_id (lexer, "RECORDS")) { - lex_match ('='); - lex_match ('('); - if (!lex_force_int ()) + lex_match (lexer, '='); + lex_match (lexer, '('); + if (!lex_force_int (lexer)) goto error; - dls->record_cnt = lex_integer (); - lex_get (); - lex_match (')'); + dls->record_cnt = lex_integer (lexer); + lex_get (lexer); + lex_match (lexer, ')'); } - else if (lex_match_id ("END")) + else if (lex_match_id (lexer, "SKIP")) + { + lex_match (lexer, '='); + if (!lex_force_int (lexer)) + goto error; + dls->skip_records = lex_integer (lexer); + lex_get (lexer); + } + else if (lex_match_id (lexer, "END")) { if (dls->end) { msg (SE, _("The END subcommand may only be specified once.")); goto error; } - - lex_match ('='); - if (!lex_force_id ()) + + lex_match (lexer, '='); + if (!lex_force_id (lexer)) goto error; - dls->end = dict_lookup_var (dataset_dict (ds), tokid); - if (!dls->end) - dls->end = dict_create_var_assert (dataset_dict (ds), tokid, 0); - lex_get (); + dls->end = dict_lookup_var (dict, lex_tokid (lexer)); + if (!dls->end) + dls->end = dict_create_var_assert (dict, lex_tokid (lexer), 0); + lex_get (lexer); } - else if (token == T_ID) + else if (lex_token (lexer) == T_ID) { - if (lex_match_id ("NOTABLE")) + if (lex_match_id (lexer, "NOTABLE")) table = 0; - else if (lex_match_id ("TABLE")) + else if (lex_match_id (lexer, "TABLE")) table = 1; - else + else { int type; - if (lex_match_id ("FIXED")) + if (lex_match_id (lexer, "FIXED")) type = DLS_FIXED; - else if (lex_match_id ("FREE")) + else if (lex_match_id (lexer, "FREE")) type = DLS_FREE; - else if (lex_match_id ("LIST")) + else if (lex_match_id (lexer, "LIST")) type = DLS_LIST; - else + else { - lex_error (NULL); + lex_error (lexer, NULL); goto error; } @@ -204,35 +211,35 @@ cmd_data_list (struct dataset *ds) dls->type = type; if ((dls->type == DLS_FREE || dls->type == DLS_LIST) - && lex_match ('(')) + && lex_match (lexer, '(')) { - while (!lex_match (')')) + while (!lex_match (lexer, ')')) { int delim; - if (lex_match_id ("TAB")) + if (lex_match_id (lexer, "TAB")) delim = '\t'; - else if (token == T_STRING && ds_length (&tokstr) == 1) + else if (lex_token (lexer) == T_STRING && ds_length (lex_tokstr (lexer)) == 1) { - delim = ds_first (&tokstr); - lex_get (); + delim = ds_first (lex_tokstr (lexer)); + lex_get (lexer); } - else + else { - lex_error (NULL); + lex_error (lexer, NULL); goto error; } ds_put_char (&dls->delims, delim); - lex_match (','); + lex_match (lexer, ','); } } } } else { - lex_error (NULL); + lex_error (lexer, NULL); goto error; } } @@ -245,11 +252,11 @@ cmd_data_list (struct dataset *ds) if (table == -1) table = dls->type != DLS_FREE; - ok = (dls->type == DLS_FIXED ? parse_fixed : parse_free) (dict, tmp_pool, dls); + ok = (dls->type == DLS_FIXED ? parse_fixed : parse_free) (lexer, dict, tmp_pool, dls); if (!ok) goto error; - if (lex_end_of_command () != CMD_SUCCESS) + if (lex_end_of_command (lexer) != CMD_SUCCESS) goto error; if (table) @@ -260,14 +267,23 @@ cmd_data_list (struct dataset *ds) dump_free_table (dls, fh); } - dls->reader = dfm_open_reader (fh); + dls->reader = dfm_open_reader (fh, lexer); if (dls->reader == NULL) goto error; + dls->value_cnt = dict_get_next_value_idx (dict); + if (in_input_program ()) add_transformation (ds, data_list_trns_proc, data_list_trns_free, dls); - else - proc_set_source (ds, create_case_source (&data_list_source_class, dls)); + else + { + struct casereader *reader; + reader = casereader_create_sequential (NULL, + dict_get_next_value_idx (dict), + -1, &data_list_casereader_class, + dls); + proc_set_active_file (ds, reader, dict); + } pool_destroy (tmp_pool); @@ -285,14 +301,14 @@ cmd_data_list (struct dataset *ds) needed once parsing is complete. Returns true only if successful. */ static bool -parse_fixed (struct dictionary *dict, +parse_fixed (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool, struct data_list_pgm *dls) { int last_nonempty_record; int record = 0; int column = 1; - while (token != '.') + while (lex_token (lexer) != '.') { char **names; size_t name_cnt, name_idx; @@ -300,9 +316,10 @@ parse_fixed (struct dictionary *dict, size_t format_cnt; /* Parse everything. */ - if (!parse_record_placement (&record, &column) - || !parse_DATA_LIST_vars_pool (tmp_pool, &names, &name_cnt, PV_NONE) - || !parse_var_placements (tmp_pool, name_cnt, true, + if (!parse_record_placement (lexer, &record, &column) + || !parse_DATA_LIST_vars_pool (lexer, tmp_pool, + &names, &name_cnt, PV_NONE) + || !parse_var_placements (lexer, tmp_pool, name_cnt, true, &formats, &format_cnt)) return false; @@ -315,7 +332,7 @@ parse_fixed (struct dictionary *dict, int width; struct variable *v; struct dls_var_spec *spec; - + name = names[name_idx++]; /* Create variable. */ @@ -325,8 +342,7 @@ parse_fixed (struct dictionary *dict, { /* Success. */ struct fmt_spec output = fmt_for_output_from_input (f); - v->print = output; - v->write = output; + var_set_both_formats (v, &output); } else { @@ -334,7 +350,7 @@ parse_fixed (struct dictionary *dict, This can be acceptable if we're in INPUT PROGRAM, but only if the existing variable has the same width as the one we would have - created. */ + created. */ if (!in_input_program ()) { msg (SE, _("%s is a duplicate variable name."), name); @@ -342,14 +358,14 @@ parse_fixed (struct dictionary *dict, } v = dict_lookup_var_assert (dict, name); - if ((width != 0) != (v->width != 0)) + if ((width != 0) != (var_get_width (v) != 0)) { msg (SE, _("There is already a variable %s of a " "different type."), name); return false; } - if (width != 0 && width != v->width) + if (width != 0 && width != var_get_width (v)) { msg (SE, _("There is already a string variable %s of a " "different width."), name); @@ -360,17 +376,17 @@ parse_fixed (struct dictionary *dict, /* Create specifier for parsing the variable. */ spec = pool_alloc (dls->pool, sizeof *spec); spec->input = *f; - spec->fv = v->fv; + spec->fv = var_get_case_index (v); spec->record = record; spec->first_column = column; - strcpy (spec->name, v->name); + strcpy (spec->name, var_get_name (v)); ll_push_tail (&dls->specs, &spec->ll); column += f->w; } assert (name_idx == name_cnt); } - if (ll_is_empty (&dls->specs)) + if (ll_is_empty (&dls->specs)) { msg (SE, _("At least one variable must be specified.")); return false; @@ -383,7 +399,7 @@ parse_fixed (struct dictionary *dict, "should not exist according to RECORDS subcommand.")); return false; } - else if (!dls->record_cnt) + else if (!dls->record_cnt) dls->record_cnt = last_nonempty_record; return true; @@ -437,30 +453,38 @@ dump_fixed_table (const struct ll_list *specs, them to DLS. Uses TMP_POOL for data that is not needed once parsing is complete. Returns true only if successful. */ static bool -parse_free (struct dictionary *dict, struct pool *tmp_pool, struct data_list_pgm *dls) +parse_free (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool, + struct data_list_pgm *dls) { - lex_get (); - while (token != '.') + lex_get (lexer); + while (lex_token (lexer) != '.') { struct fmt_spec input, output; char **name; size_t name_cnt; size_t i; - if (!parse_DATA_LIST_vars_pool (tmp_pool, &name, &name_cnt, PV_NONE)) + if (!parse_DATA_LIST_vars_pool (lexer, tmp_pool, + &name, &name_cnt, PV_NONE)) return 0; - if (lex_match ('(')) + if (lex_match (lexer, '(')) { - if (!parse_format_specifier (&input) + if (!parse_format_specifier (lexer, &input) || !fmt_check_input (&input) - || !lex_force_match (')')) + || !lex_force_match (lexer, ')')) return NULL; + + /* As a special case, N format is treated as F format + for free-field input. */ + if (input.type == FMT_N) + input.type = FMT_F; + output = fmt_for_output_from_input (&input); } else { - lex_match ('*'); + lex_match (lexer, '*'); input = fmt_for_input (FMT_F, 8, 0); output = *get_format (); } @@ -476,12 +500,12 @@ parse_free (struct dictionary *dict, struct pool *tmp_pool, struct data_list_pgm msg (SE, _("%s is a duplicate variable name."), name[i]); return 0; } - v->print = v->write = output; + var_set_both_formats (v, &output); spec = pool_alloc (dls->pool, sizeof *spec); spec->input = input; - spec->fv = v->fv; - strcpy (spec->name, v->name); + spec->fv = var_get_case_index (v); + strcpy (spec->name, var_get_name (v)); ll_push_tail (&dls->specs, &spec->ll); } } @@ -501,7 +525,7 @@ dump_free_table (const struct data_list_pgm *dls, int row; spec_cnt = ll_count (&dls->specs); - + t = tab_create (2, spec_cnt + 1, 0); tab_columns (t, TAB_COL_DOWN, 1); tab_headers (t, 0, 0, 1, 0); @@ -521,11 +545,11 @@ dump_free_table (const struct data_list_pgm *dls, } tab_title (t, _("Reading free-form data from %s."), fh_get_name (fh)); - + tab_submit (t); } -/* Input procedure. */ +/* Input procedure. */ /* Extracts a field from the current position in the current record. Fields can be unquoted or quoted with single- or @@ -533,7 +557,7 @@ dump_free_table (const struct data_list_pgm *dls, *FIELD is set to the field content. The caller must not or destroy this constant string. - + After parsing the field, sets the current position in the record to just past the field and any trailing delimiter. Returns 0 on failure or a 1-based column number indicating the @@ -549,15 +573,15 @@ cut_field (const struct data_list_pgm *dls, struct substring *field) dfm_expand_tabs (dls->reader); line = p = dfm_get_record (dls->reader); - if (ds_is_empty (&dls->delims)) + if (ds_is_empty (&dls->delims)) { bool missing_quote = false; - + /* Skip leading whitespace. */ ss_ltrim (&p, ss_cstr (CC_SPACES)); if (ss_is_empty (p)) return false; - + /* Handle actual data, whether quoted or unquoted. */ if (ss_match_char (&p, '\'')) missing_quote = !ss_get_until (&p, '\'', field); @@ -574,7 +598,7 @@ cut_field (const struct data_list_pgm *dls, struct substring *field) dfm_forward_columns (dls->reader, ss_length (line) - ss_length (p)); } - else + else { if (!ss_is_empty (p)) ss_get_chars (&p, ss_cspan (p, ds_ss (&dls->delims)), field); @@ -584,11 +608,11 @@ cut_field (const struct data_list_pgm *dls, struct substring *field) trailing blank field. */ *field = p; } - else + else return false; /* Advance past the field. - + Also advance past a trailing delimiter, regardless of whether one actually existed. If we "skip" a delimiter that was not actually there, then we will return @@ -608,7 +632,7 @@ static bool read_from_data_list_list (const struct data_list_pgm *, /* Reads a case from DLS into C. Returns true if successful, false at end of file or on I/O error. */ static bool -read_from_data_list (const struct data_list_pgm *dls, struct ccase *c) +read_from_data_list (const struct data_list_pgm *dls, struct ccase *c) { bool retval; @@ -633,7 +657,7 @@ read_from_data_list (const struct data_list_pgm *dls, struct ccase *c) } /* Reads a case from the data file into C, parsing it according - to fixed-format syntax rules in DLS. + to fixed-format syntax rules in DLS. Returns true if successful, false at end of file or on I/O error. */ static bool read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c) @@ -641,8 +665,8 @@ read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c) struct dls_var_spec *spec; int row; - if (dfm_eof (dls->reader)) - return false; + if (dfm_eof (dls->reader)) + return false; spec = ll_to_dls_var_spec (ll_head (&dls->specs)); for (row = 1; row <= dls->record_cnt; row++) @@ -654,23 +678,19 @@ read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c) msg (SW, _("Partial case of %d of %d records discarded."), row - 1, dls->record_cnt); return false; - } + } dfm_expand_tabs (dls->reader); line = dfm_get_record (dls->reader); - ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs) + ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs) { - struct data_in di; + if (row < spec->record) + break; - data_in_finite_line (&di, ss_data (line), ss_length (line), - spec->first_column, - spec->first_column + spec->input.w - 1); - di.v = case_data_rw (c, spec->fv); - di.flags = DI_IMPLIED_DECIMALS; - di.f1 = spec->first_column; - di.format = spec->input; - - data_in (&di); + data_in (ss_substr (line, spec->first_column - 1, spec->input.w), + spec->input.type, spec->input.d, spec->first_column, + case_data_rw_idx (c, spec->fv), + fmt_var_width (&spec->input)); } dfm_forward_record (dls->reader); @@ -680,7 +700,7 @@ read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c) } /* Reads a case from the data file into C, parsing it according - to free-format syntax rules in DLS. + to free-format syntax rules in DLS. Returns true if successful, false at end of file or on I/O error. */ static bool read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) @@ -690,12 +710,11 @@ read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) ll_for_each (spec, struct dls_var_spec, ll, &dls->specs) { struct substring field; - struct data_in di; - + /* Cut out a field and read in a new record if necessary. */ while (!cut_field (dls, &field)) { - if (!dfm_eof (dls->reader)) + if (!dfm_eof (dls->reader)) dfm_forward_record (dls->reader); if (dfm_eof (dls->reader)) { @@ -705,20 +724,16 @@ read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) return false; } } - - di.s = ss_data (field); - di.e = ss_end (field); - di.v = case_data_rw (c, spec->fv); - di.flags = 0; - di.f1 = dfm_get_column (dls->reader, ss_data (field)); - di.format = spec->input; - data_in (&di); + + data_in (field, spec->input.type, 0, + dfm_get_column (dls->reader, ss_data (field)), + case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input)); } return true; } /* Reads a case from the data file and parses it according to - list-format syntax rules. + list-format syntax rules. Returns true if successful, false at end of file or on I/O error. */ static bool read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) @@ -731,7 +746,6 @@ read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) ll_for_each (spec, struct dls_var_spec, ll, &dls->specs) { struct substring field; - struct data_in di; if (!cut_field (dls, &field)) { @@ -744,20 +758,16 @@ read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) { int width = fmt_var_width (&spec->input); if (width == 0) - case_data_rw (c, spec->fv)->f = SYSMIS; + case_data_rw_idx (c, spec->fv)->f = SYSMIS; else - memset (case_data_rw (c, spec->fv)->s, ' ', width); + memset (case_data_rw_idx (c, spec->fv)->s, ' ', width); } break; } - - di.s = ss_data (field); - di.e = ss_end (field); - di.v = case_data_rw (c, spec->fv); - di.flags = 0; - di.f1 = dfm_get_column (dls->reader, ss_data (field)); - di.format = spec->input; - data_in (&di); + + data_in (field, spec->input.type, 0, + dfm_get_column (dls->reader, ss_data (field)), + case_data_rw_idx (c, spec->fv), fmt_var_width (&spec->input)); } dfm_forward_record (dls->reader); @@ -784,7 +794,7 @@ data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED) if (read_from_data_list (dls, c)) retval = TRNS_CONTINUE; - else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1) + else if (dfm_reader_error (dls->reader) || dfm_eof (dls->reader) > 1) { /* An I/O error, or encountering end of file for a second time, should be escalated into a more serious error. */ @@ -792,11 +802,11 @@ data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED) } else retval = TRNS_END_FILE; - + /* If there was an END subcommand handle it. */ - if (dls->end != NULL) + if (dls->end != NULL) { - double *end = &case_data_rw (c, dls->end->fv)->f; + double *end = &case_data_rw (c, dls->end)->f; if (retval == TRNS_DROP_CASE) { *end = 1.0; @@ -809,42 +819,47 @@ data_list_trns_proc (void *dls_, struct ccase *c, casenumber case_num UNUSED) return retval; } -/* Reads all the records from the data file and passes them to - write_case(). - Returns true if successful, false if an I/O error occurred. */ +/* Reads one case into OUTPUT_CASE. + Returns true if successful, false at end of file or if an + I/O error occurred. */ static bool -data_list_source_read (struct case_source *source, - struct ccase *c, - write_case_func *write_case, write_case_data wc_data) +data_list_casereader_read (struct casereader *reader UNUSED, void *dls_, + struct ccase *c) { - struct data_list_pgm *dls = source->aux; + struct data_list_pgm *dls = dls_; + bool ok; - for (;;) + /* Skip the requested number of records before reading the + first case. */ + while (dls->skip_records > 0) { - bool ok; - - if (!read_from_data_list (dls, c)) - return !dfm_reader_error (dls->reader); - - dfm_push (dls->reader); - ok = write_case (wc_data); - dfm_pop (dls->reader); - if (!ok) + if (dfm_eof (dls->reader)) return false; + dfm_forward_record (dls->reader); + dls->skip_records--; } + + case_create (c, dls->value_cnt); + ok = read_from_data_list (dls, c); + if (!ok) + case_destroy (c); + return ok; } -/* Destroys the source's internal data. */ +/* Destroys the casereader. */ static void -data_list_source_destroy (struct case_source *source) +data_list_casereader_destroy (struct casereader *reader UNUSED, void *dls_) { - data_list_trns_free (source->aux); + struct data_list_pgm *dls = dls_; + if (dfm_reader_error (dls->reader)) + casereader_force_error (reader); + data_list_trns_free (dls); } -static const struct case_source_class data_list_source_class = +static const struct casereader_class data_list_casereader_class = { - "DATA LIST", + data_list_casereader_read, + data_list_casereader_destroy, + NULL, NULL, - data_list_source_read, - data_list_source_destroy, };