From: Ben Pfaff Date: Mon, 20 Sep 2010 04:36:58 +0000 (-0700) Subject: data-in: Make data_in() parameters more uniform. X-Git-Tag: sav-api~2 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=38993354cabb6fc37bb882be92f9a49e9aeb4c88 data-in: Make data_in() parameters more uniform. data_in() essentially takes an input encoding and an output encoding but the parameters were in inconsistent order, inconsistently named, and had inconsistent types. This commit changes all that to, I hope, be easier to understand. --- diff --git a/perl-module/PSPP.xs b/perl-module/PSPP.xs index 0e918a9fc2..bf90d986f3 100644 --- a/perl-module/PSPP.xs +++ b/perl-module/PSPP.xs @@ -655,9 +655,8 @@ CODE: { struct substring ss = ss_cstr (SvPV_nolen (sv)); if ( ! data_in (ss, LEGACY_NATIVE, ifmt->type, 0, 0, - sfi->dict, - case_data_rw (c, v), - var_get_width (v)) ) + case_data_rw (c, v), var_get_width (v), + dict_get_encoding (sfi->dict))) { RETVAL = 0; goto finish; diff --git a/src/data/data-in.c b/src/data/data-in.c index b8226b37f9..de19d67048 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -79,17 +79,16 @@ static bool trim_spaces_and_check_missing (struct data_in *); static int hexit_value (int c); /* Parses the characters in INPUT, which are encoded in the given - ENCODING, according to FORMAT. Stores the parsed - representation in OUTPUT, which the caller must have - initialized with the given WIDTH (0 for a numeric field, - otherwise the string width). - Iff FORMAT is a string format, then DICT must be a pointer - to the dictionary associated with OUTPUT. Otherwise, DICT - may be null. */ + INPUT_ENCODING, according to FORMAT. + + Stores the parsed representation in OUTPUT, which the caller must have + initialized with the given WIDTH (0 for a numeric field, otherwise the + string width). If FORMAT is FMT_A, then OUTPUT_ENCODING must specify the + correct encoding for OUTPUT (normally obtained via dict_get_encoding()). */ bool -data_in (struct substring input, const char *encoding, +data_in (struct substring input, const char *input_encoding, enum fmt_type format, int first_column, int last_column, - const struct dictionary *dict, union value *output, int width) + union value *output, int width, const char *output_encoding) { static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] = { @@ -146,13 +145,14 @@ data_in (struct substring input, const char *encoding, else { /* Use the final output encoding. */ - dest_encoding = dict_get_encoding (dict); + dest_encoding = output_encoding; } } if (dest_encoding != NULL) { - i.input = recode_substring_pool (dest_encoding, encoding, input, NULL); + i.input = recode_substring_pool (dest_encoding, input_encoding, input, + NULL); s = i.input.string; } else @@ -209,7 +209,7 @@ number_has_implied_decimals (const char *s, enum fmt_type type) } static bool -has_implied_decimals (struct substring input, const char *encoding, +has_implied_decimals (struct substring input, const char *input_encoding, enum fmt_type format) { bool retval; @@ -237,7 +237,7 @@ has_implied_decimals (struct substring input, const char *encoding, return false; } - s = recode_string (LEGACY_NATIVE, encoding, + s = recode_string (LEGACY_NATIVE, input_encoding, ss_data (input), ss_length (input)); retval = (format == FMT_Z ? strchr (s, '.') == NULL @@ -255,11 +255,11 @@ has_implied_decimals (struct substring input, const char *encoding, If it is appropriate, this function modifies the numeric value in OUTPUT. */ void -data_in_imply_decimals (struct substring input, const char *encoding, +data_in_imply_decimals (struct substring input, const char *input_encoding, enum fmt_type format, int d, union value *output) { if (d > 0 && output->f != SYSMIS - && has_implied_decimals (input, encoding, format)) + && has_implied_decimals (input, input_encoding, format)) output->f /= pow (10., d); } diff --git a/src/data/data-in.h b/src/data/data-in.h index af62b3aecb..dbf2fa7caa 100644 --- a/src/data/data-in.h +++ b/src/data/data-in.h @@ -25,10 +25,9 @@ union value; struct dictionary; -bool data_in (struct substring input, const char *encoding, - enum fmt_type, int first_column, int last_column, - const struct dictionary *dict, - union value *output, int width); +bool data_in (struct substring input, const char *input_encoding, + enum fmt_type, int first_column, int last_column, + union value *output, int width, const char *output_encoding); void data_in_imply_decimals (struct substring input, const char *encoding, enum fmt_type format, int d, union value *output); diff --git a/src/language/data-io/data-parser.c b/src/language/data-io/data-parser.c index 0802bba8c5..800ae57b50 100644 --- a/src/language/data-io/data-parser.c +++ b/src/language/data-io/data-parser.c @@ -509,7 +509,8 @@ static bool parse_fixed (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - const char *encoding = dfm_reader_get_legacy_encoding (reader); + const char *input_encoding = dfm_reader_get_legacy_encoding (reader); + const char *output_encoding = dict_get_encoding (parser->dict); struct field *f; int row; @@ -536,12 +537,12 @@ parse_fixed (const struct data_parser *parser, struct dfm_reader *reader, f->format.w); union value *value = case_data_rw_idx (c, f->case_idx); - data_in (s, encoding, f->format.type, + data_in (s, input_encoding, f->format.type, f->first_column, f->first_column + f->format.w, - parser->dict, value, fmt_var_width (&f->format)); + value, fmt_var_width (&f->format), output_encoding); - data_in_imply_decimals (s, encoding, f->format.type, f->format.d, - value); + data_in_imply_decimals (s, input_encoding, f->format.type, + f->format.d, value); } dfm_forward_record (reader); @@ -557,7 +558,8 @@ static bool parse_delimited_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - const char *encoding = dfm_reader_get_legacy_encoding (reader); + const char *input_encoding = dfm_reader_get_legacy_encoding (reader); + const char *output_encoding = dict_get_encoding (parser->dict); struct string tmp = DS_EMPTY_INITIALIZER; struct field *f; @@ -582,10 +584,9 @@ parse_delimited_span (const struct data_parser *parser, } } - data_in (s, encoding, f->format.type, first_column, last_column, - parser->dict, + data_in (s, input_encoding, f->format.type, first_column, last_column, case_data_rw_idx (c, f->case_idx), - fmt_var_width (&f->format)); + fmt_var_width (&f->format), output_encoding); } ds_destroy (&tmp); return true; @@ -598,7 +599,8 @@ static bool parse_delimited_no_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - const char *encoding = dfm_reader_get_legacy_encoding (reader); + const char *input_encoding = dfm_reader_get_legacy_encoding (reader); + const char *output_encoding = dict_get_encoding (parser->dict); struct string tmp = DS_EMPTY_INITIALIZER; struct substring s; struct field *f, *end; @@ -623,10 +625,9 @@ parse_delimited_no_span (const struct data_parser *parser, goto exit; } - data_in (s, encoding, f->format.type, first_column, last_column, - parser->dict, + data_in (s, input_encoding, f->format.type, first_column, last_column, case_data_rw_idx (c, f->case_idx), - fmt_var_width (&f->format)); + fmt_var_width (&f->format), output_encoding); } s = dfm_get_record (reader); diff --git a/src/language/expressions/operations.def b/src/language/expressions/operations.def index a945f7bf99..b7783257cd 100644 --- a/src/language/expressions/operations.def +++ b/src/language/expressions/operations.def @@ -581,7 +581,7 @@ string function RTRIM (string s, string c) function NUMBER (string s, ni_format f) { union value out; - data_in (ss_head (s, f->w), LEGACY_NATIVE, f->type, 0, 0, NULL, &out, 0); + data_in (ss_head (s, f->w), LEGACY_NATIVE, f->type, 0, 0, &out, 0, NULL); data_in_imply_decimals (s, LEGACY_NATIVE, f->type, f->d, &out); return out.f; } diff --git a/src/language/lexer/value-parser.c b/src/language/lexer/value-parser.c index c2020d3658..8cbbab4c82 100644 --- a/src/language/lexer/value-parser.c +++ b/src/language/lexer/value-parser.c @@ -105,7 +105,7 @@ parse_number (struct lexer *lexer, double *x, const enum fmt_type *format) union value v; assert (! (fmt_get_category (*format) & ( FMT_CAT_STRING ))); data_in (ds_ss (lex_tokstr (lexer)), LEGACY_NATIVE, *format, 0, 0, - NULL, &v, 0); + &v, 0, NULL); lex_get (lexer); *x = v.f; if (*x == SYSMIS) diff --git a/src/language/stats/flip.c b/src/language/stats/flip.c index 3474979aa2..7390b08c0a 100644 --- a/src/language/stats/flip.c +++ b/src/language/stats/flip.c @@ -397,6 +397,7 @@ static struct ccase * flip_casereader_read (struct casereader *reader, void *flip_) { struct flip_pgm *flip = flip_; + const char *encoding; struct ccase *c; size_t i; @@ -404,12 +405,10 @@ flip_casereader_read (struct casereader *reader, void *flip_) return false; c = case_create (casereader_get_proto (reader)); - data_in (ss_cstr (flip->old_names.names[flip->cases_read]), dict_get_encoding (flip->dict), - FMT_A, - 0, 0, - flip->dict, - case_data_rw_idx (c, 0), 8); - + encoding = dict_get_encoding (flip->dict); + data_in (ss_cstr (flip->old_names.names[flip->cases_read]), encoding, + FMT_A, 0, 0, case_data_rw_idx (c, 0), 8, encoding); + for (i = 0; i < flip->n_cases; i++) { double in; diff --git a/src/language/xforms/recode.c b/src/language/xforms/recode.c index 7f45865739..c9787b11bd 100644 --- a/src/language/xforms/recode.c +++ b/src/language/xforms/recode.c @@ -159,6 +159,7 @@ cmd_recode (struct lexer *lexer, struct dataset *ds) /* Create destination variables, if needed. This must be the final step; otherwise we'd have to delete destination variables on failure. */ + trns->dst_dict = dataset_dict (ds); if (trns->src_vars != trns->dst_vars) create_dst_vars (trns, dataset_dict (ds)); @@ -545,8 +546,6 @@ create_dst_vars (struct recode_trns *trns, struct dictionary *dict) { size_t i; - trns->dst_dict = dict; - for (i = 0; i < trns->var_cnt; i++) { const struct variable **var = &trns->dst_vars[i]; @@ -608,8 +607,9 @@ static const struct map_out * find_src_string (struct recode_trns *trns, const uint8_t *value, const struct variable *src_var) { - struct mapping *m; + const char *encoding = dict_get_encoding (trns->dst_dict); int width = var_get_width (src_var); + struct mapping *m; for (m = trns->mappings; m < trns->mappings + trns->map_cnt; m++) { @@ -632,8 +632,7 @@ find_src_string (struct recode_trns *trns, const uint8_t *value, msg_disable (); match = data_in (ss_buffer (CHAR_CAST_BUG (char *, value), width), - LEGACY_NATIVE, FMT_F, 0, 0, trns->dst_dict, - &uv, 0); + LEGACY_NATIVE, FMT_F, 0, 0, &uv, 0, encoding); msg_enable (); out->value.f = uv.f; break; diff --git a/src/ui/gui/helper.c b/src/ui/gui/helper.c index 70e3c27394..b8936e6c3b 100644 --- a/src/ui/gui/helper.c +++ b/src/ui/gui/helper.c @@ -98,7 +98,8 @@ text_to_value (const gchar *text, value_init (val, width); msg_disable (); - data_in (ss_cstr (text), UTF8, format->type, 0, 0, dict->dict, val, width); + data_in (ss_cstr (text), UTF8, format->type, 0, 0, val, width, + dict_get_encoding (dict->dict)); msg_enable (); return val; diff --git a/src/ui/gui/psppire-data-store.c b/src/ui/gui/psppire-data-store.c index fb9c81797f..7d26f5629b 100644 --- a/src/ui/gui/psppire-data-store.c +++ b/src/ui/gui/psppire-data-store.c @@ -959,7 +959,8 @@ psppire_data_store_data_in (PsppireDataStore *ds, casenumber casenum, gint idx, FALSE); value_init (&value, width); ok = (datasheet_get_value (ds->datasheet, casenum, idx, &value) - && data_in (input, UTF8, fmt->type, 0, 0, dict->dict, &value, width) + && data_in (input, UTF8, fmt->type, 0, 0, &value, width, + dict_get_encoding (dict->dict)) && datasheet_put_value (ds->datasheet, casenum, idx, &value)); value_destroy (&value, width); diff --git a/src/ui/gui/text-data-import-dialog.c b/src/ui/gui/text-data-import-dialog.c index 070c4f6b3c..c9227ff854 100644 --- a/src/ui/gui/text-data-import-dialog.c +++ b/src/ui/gui/text-data-import-dialog.c @@ -1777,8 +1777,8 @@ parse_field (struct import_assistant *ia, { msg_disable (); - if (!data_in (field, LEGACY_NATIVE, in->type, 0, 0, ia->formats.dict, - &val, var_get_width (var))) + if (!data_in (field, LEGACY_NATIVE, in->type, 0, 0, &val, + var_get_width (var), dict_get_encoding (ia->formats.dict))) { char fmt_string[FMT_STRING_LEN_MAX + 1]; fmt_to_string (in, fmt_string); diff --git a/src/ui/syntax-gen.c b/src/ui/syntax-gen.c index b204fbf6f3..aae8c972f9 100644 --- a/src/ui/syntax-gen.c +++ b/src/ui/syntax-gen.c @@ -155,7 +155,7 @@ syntax_gen_number (struct string *output, msg_disable (); /* FIXME: UTF8 encoded strings will fail here */ ok = data_in (ss_cstr (s), LEGACY_NATIVE, - format->type, 0, 0, NULL, &v_out, 0); + format->type, 0, 0, &v_out, 0, NULL); msg_enable (); if (ok && v_out.f == number) {