X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fdata-parser.c;h=0802bba8c5971117b2ebf9db50ca7ed35b9a0d02;hb=4e8efdc4acb80fc1a3735228d29fca0cf86fee6d;hp=87fd1b7919f6de9de5ec076dd1c1dc794840f47f;hpb=cc57a28ef6796ae9a64ef80d453f72126956d49d;p=pspp diff --git a/src/language/data-io/data-parser.c b/src/language/data-io/data-parser.c index 87fd1b7919..0802bba8c5 100644 --- a/src/language/data-io/data-parser.c +++ b/src/language/data-io/data-parser.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2009 Free Software Foundation, Inc. + Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include "xalloc.h" @@ -41,6 +41,7 @@ /* Data parser for textual data like that read by DATA LIST. */ struct data_parser { + const struct dictionary *dict; /*Dictionary of destination */ enum data_parser_type type; /* Type of data to parse. */ int skip_records; /* Records to skip before first real data. */ casenumber max_cases; /* Max number of cases to read. */ @@ -79,7 +80,7 @@ static void set_any_sep (struct data_parser *parser); /* Creates and returns a new data parser. */ struct data_parser * -data_parser_create (void) +data_parser_create (const struct dictionary *dict) { struct data_parser *parser = xmalloc (sizeof *parser); @@ -91,6 +92,7 @@ data_parser_create (void) parser->fields = NULL; parser->field_cnt = 0; parser->field_allocated = 0; + parser->dict = dict; parser->span = true; parser->empty_line_has_field = false; @@ -469,7 +471,7 @@ cut_field (const struct data_parser *parser, struct dfm_reader *reader, } *field = ds_ss (tmp); } - *last_column = dfm_column_start (reader); + *last_column = *first_column + (ss_length (line) - ss_length (p)); /* Skip trailing soft separator and a single hard separator if present. */ @@ -482,8 +484,10 @@ cut_field (const struct data_parser *parser, struct dfm_reader *reader, { /* Regular field. */ ss_get_chars (&p, ss_cspan (p, ds_ss (&parser->any_sep)), field); - *last_column = dfm_column_start (reader); - if (!ss_ltrim (&p, parser->soft_seps) || ss_is_empty (p)) + *last_column = *first_column + ss_length (*field); + + if (!ss_ltrim (&p, parser->soft_seps) || ss_is_empty (p) + || ss_find_char (parser->hard_seps, p.string[0]) != SIZE_MAX) { /* Advance past a trailing hard separator, regardless of whether one actually existed. If @@ -505,7 +509,7 @@ static bool parse_fixed (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader); + const char *encoding = dfm_reader_get_legacy_encoding (reader); struct field *f; int row; @@ -527,12 +531,18 @@ parse_fixed (const struct data_parser *parser, struct dfm_reader *reader, line = dfm_get_record (reader); for (; f < &parser->fields[parser->field_cnt] && f->record == row; f++) - data_in (ss_substr (line, f->first_column - 1, - f->format.w), - encoding, f->format.type, f->format.d, - f->first_column, f->first_column + f->format.w, - case_data_rw_idx (c, f->case_idx), - fmt_var_width (&f->format)); + { + struct substring s = ss_substr (line, f->first_column - 1, + f->format.w); + union value *value = case_data_rw_idx (c, f->case_idx); + + data_in (s, encoding, f->format.type, + f->first_column, f->first_column + f->format.w, + parser->dict, value, fmt_var_width (&f->format)); + + data_in_imply_decimals (s, encoding, f->format.type, f->format.d, + value); + } dfm_forward_record (reader); } @@ -547,7 +557,7 @@ static bool parse_delimited_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader); + const char *encoding = dfm_reader_get_legacy_encoding (reader); struct string tmp = DS_EMPTY_INITIALIZER; struct field *f; @@ -572,8 +582,8 @@ parse_delimited_span (const struct data_parser *parser, } } - data_in (s, encoding, f->format.type, 0, - first_column, last_column, + data_in (s, encoding, f->format.type, first_column, last_column, + parser->dict, case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format)); } @@ -588,32 +598,33 @@ static bool parse_delimited_no_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader); + const char *encoding = dfm_reader_get_legacy_encoding (reader); struct string tmp = DS_EMPTY_INITIALIZER; struct substring s; - struct field *f; + struct field *f, *end; if (dfm_eof (reader)) return false; - for (f = parser->fields; f < &parser->fields[parser->field_cnt]; f++) + end = &parser->fields[parser->field_cnt]; + for (f = parser->fields; f < end; f++) { int first_column, last_column; if (!cut_field (parser, reader, &first_column, &last_column, &tmp, &s)) { - if (settings_get_undefined ()) + if (f < end - 1 && settings_get_undefined ()) msg (SW, _("Missing value(s) for all variables from %s onward. " "These will be filled with the system-missing value " "or blanks, as appropriate."), f->name); - for (; f < &parser->fields[parser->field_cnt]; f++) + for (; f < end; f++) value_set_missing (case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format)); goto exit; } - data_in (s, encoding, f->format.type, 0, - first_column, last_column, + data_in (s, encoding, f->format.type, first_column, last_column, + parser->dict, case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format)); } @@ -638,8 +649,7 @@ dump_fixed_table (const struct data_parser *parser, struct tab_table *t; size_t i; - t = tab_create (4, parser->field_cnt + 1, 0); - tab_columns (t, TAB_COL_DOWN, 1); + t = tab_create (4, parser->field_cnt + 1); tab_headers (t, 0, 0, 1, 0); tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable")); tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Record")); @@ -647,7 +657,6 @@ dump_fixed_table (const struct data_parser *parser, tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format")); tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, parser->field_cnt); tab_hline (t, TAL_2, 0, 3, 1); - tab_dim (t, tab_natural_dimensions); for (i = 0; i < parser->field_cnt; i++) { @@ -656,9 +665,9 @@ dump_fixed_table (const struct data_parser *parser, int row = i + 1; tab_text (t, 0, row, TAB_LEFT, f->name); - tab_text (t, 1, row, TAT_PRINTF, "%d", f->record); - tab_text (t, 2, row, TAT_PRINTF, "%3d-%3d", - f->first_column, f->first_column + f->format.w - 1); + tab_text_format (t, 1, row, 0, "%d", f->record); + tab_text_format (t, 2, row, 0, "%3d-%3d", + f->first_column, f->first_column + f->format.w - 1); tab_text (t, 3, row, TAB_LEFT | TAB_FIX, fmt_to_string (&f->format, fmt_string)); } @@ -679,14 +688,12 @@ dump_delimited_table (const struct data_parser *parser, struct tab_table *t; size_t i; - t = tab_create (2, parser->field_cnt + 1, 0); - tab_columns (t, TAB_COL_DOWN, 1); + t = tab_create (2, parser->field_cnt + 1); tab_headers (t, 0, 0, 1, 0); tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Variable")); tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format")); tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, parser->field_cnt); tab_hline (t, TAL_2, 0, 1, 1); - tab_dim (t, tab_natural_dimensions); for (i = 0; i < parser->field_cnt; i++) { @@ -721,7 +728,7 @@ struct data_parser_casereader { struct data_parser *parser; /* Parser. */ struct dfm_reader *reader; /* Data file reader. */ - size_t value_cnt; /* Number of `union value's in case. */ + struct caseproto *proto; /* Format of cases. */ }; static const struct casereader_class data_parser_casereader_class; @@ -742,8 +749,8 @@ data_parser_make_active_file (struct data_parser *parser, struct dataset *ds, r = xmalloc (sizeof *r); r->parser = parser; r->reader = reader; - r->value_cnt = dict_get_next_value_idx (dict); - casereader = casereader_create_sequential (NULL, r->value_cnt, + r->proto = caseproto_ref (dict_get_proto (dict)); + casereader = casereader_create_sequential (NULL, r->proto, CASENUMBER_MAX, &data_parser_casereader_class, r); proc_set_active_file (ds, casereader, dict); @@ -753,7 +760,7 @@ static struct ccase * data_parser_casereader_read (struct casereader *reader UNUSED, void *r_) { struct data_parser_casereader *r = r_; - struct ccase *c = case_create (r->value_cnt); + struct ccase *c = case_create (r->proto); if (data_parser_parse (r->parser, r->reader, c)) return c; else @@ -771,6 +778,7 @@ data_parser_casereader_destroy (struct casereader *reader UNUSED, void *r_) casereader_force_error (reader); data_parser_destroy (r->parser); dfm_close_reader (r->reader); + caseproto_unref (r->proto); free (r); }