From a5097a183f00ab2d2dc538ba7094a4696e2fea04 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sun, 12 Jul 2009 22:13:44 +0800 Subject: [PATCH] Added a dict parameter to data_in and dealt with the consequences. The data_in function now takes a pointer to a struct dictionary, which must be the dictionary with which the output value is associated. Data_in now ensures that the data of string values is converted to the dictionary's encoding if necessary. --- perl-module/PSPP.xs | 1 + src/data/data-in.c | 74 +++++++++++++------------ src/data/data-in.h | 2 + src/language/data-io/data-list.c | 2 +- src/language/data-io/data-parser.c | 7 ++- src/language/data-io/data-parser.h | 2 +- src/language/data-io/get-data.c | 6 +- src/language/expressions/operations.def | 2 +- src/language/lexer/value-parser.c | 3 +- src/language/xforms/recode.c | 7 ++- src/ui/gui/find-dialog.c | 1 + src/ui/gui/helper.c | 2 + src/ui/gui/helper.h | 1 + src/ui/gui/missing-val-dialog.c | 8 ++- src/ui/gui/psppire-data-store.c | 3 +- src/ui/gui/text-data-import-dialog.c | 2 + src/ui/gui/val-labs-dialog.c | 4 ++ src/ui/syntax-gen.c | 3 +- 18 files changed, 82 insertions(+), 48 deletions(-) diff --git a/perl-module/PSPP.xs b/perl-module/PSPP.xs index 94ca9b01..b3ac4cdc 100644 --- a/perl-module/PSPP.xs +++ b/perl-module/PSPP.xs @@ -614,6 +614,7 @@ CODE: { struct substring ss = ss_cstr (SvPV_nolen (sv)); if ( ! data_in (ss, LEGACY_NATIVE, ifmt->type, 0, 0, 0, + sfi->dict, case_data_rw (c, v), var_get_width (v)) ) { diff --git a/src/data/data-in.c b/src/data/data-in.c index 7e7d087d..33e369f9 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -34,6 +34,7 @@ #include "settings.h" #include "value.h" #include "format.h" +#include "dictionary.h" #include #include @@ -54,7 +55,7 @@ /* Information about parsing one data field. */ struct data_in { - const char *encoding; /* Encoding of source. */ + const char *src_enc; /* Encoding of source. */ struct substring input; /* Source. */ enum fmt_type format; /* Input format. */ int implied_decimals; /* Number of implied decimal places. */ @@ -89,6 +90,9 @@ static int hexit_value (int c); representation in OUTPUT, which the caller must have initialized with the given WIDTH (0 for a numeric field, otherwise the string width). + Iff FORMAT is a string format, then DICT must be a pointer + to the dictionary associated with OUTPUT. Otherwise, DICT + may be null. If no decimal point is included in a numeric format, then IMPLIED_DECIMALS decimal places are implied. Specify 0 if no @@ -103,7 +107,9 @@ static int hexit_value (int c); bool data_in (struct substring input, const char *encoding, enum fmt_type format, int implied_decimals, - int first_column, int last_column, union value *output, int width) + int first_column, int last_column, + const struct dictionary *dict, + union value *output, int width) { static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] = { @@ -112,28 +118,11 @@ data_in (struct substring input, const char *encoding, }; struct data_in i; - void *copy = NULL; + bool ok; assert ((width != 0) == fmt_is_string (format)); - if (0 == strcmp (encoding, LEGACY_NATIVE) - || fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING)) - { - i.input = input; - i.encoding = encoding; - } - else - { - char *s; - ss_alloc_uninit (&i.input, ss_length (input)); - - s = recode_string (LEGACY_NATIVE, encoding, ss_data (input), ss_length (input)); - memcpy (ss_data (i.input), s, ss_length (input)); - free (s); - i.encoding = LEGACY_NATIVE; - copy = ss_data (i.input); - } i.format = format; i.implied_decimals = implied_decimals; @@ -142,21 +131,39 @@ data_in (struct substring input, const char *encoding, i.first_column = first_column; i.last_column = last_column; + i.src_enc = encoding; - if (!ss_is_empty (i.input)) + if (ss_is_empty (input)) { - ok = handlers[i.format] (&i); - if (!ok) - default_result (&i); + default_result (&i); + return true; + } + + if (fmt_get_category (format) & ( FMT_CAT_BINARY | FMT_CAT_HEXADECIMAL | FMT_CAT_LEGACY)) + { + i.input = input; } else { - default_result (&i); - ok = true; + const char *dest_encoding; + char *s = NULL; + if ( dict == NULL) + { + assert (0 == (fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING))); + dest_encoding = LEGACY_NATIVE; + } + else + dest_encoding = dict_get_encoding (dict); + + s = recode_string (dest_encoding, i.src_enc, ss_data (input), ss_length (input)); + ss_alloc_uninit (&i.input, strlen (s)); + memcpy (ss_data (i.input), s, ss_length (input)); + free (s); } - if (copy) - free (copy); + ok = handlers[i.format] (&i); + if (!ok) + default_result (&i); return ok; } @@ -617,9 +624,8 @@ parse_A (struct data_in *i) const char *src = ss_data (i->input); size_t src_size = ss_length (i->input); - char *s = recode_string (LEGACY_NATIVE, i->encoding, src, MIN (src_size, dst_size)); - memcpy (dst, s, dst_size); - free (s); + memcpy (dst, src, MIN (src_size, dst_size)); + if (dst_size > src_size) memset (&dst[src_size], ' ', dst_size - src_size); @@ -645,10 +651,10 @@ parse_AHEX (struct data_in *i) return false; } - if (0 != strcmp (i->encoding, LEGACY_NATIVE)) + if (0 != strcmp (i->src_enc, LEGACY_NATIVE)) { - hi = legacy_to_native (i->encoding, hi); - lo = legacy_to_native (i->encoding, lo); + hi = legacy_to_native (i->src_enc, hi); + lo = legacy_to_native (i->src_enc, lo); } if (!c_isxdigit (hi) || !c_isxdigit (lo)) { diff --git a/src/data/data-in.h b/src/data/data-in.h index 5256bb91..3ebd5933 100644 --- a/src/data/data-in.h +++ b/src/data/data-in.h @@ -26,9 +26,11 @@ enum fmt_type; union value; +struct dictionary; bool data_in (struct substring input, const char *encoding, enum fmt_type, int implied_decimals, int first_column, int last_column, + const struct dictionary *dict, union value *output, int width); #endif /* data/data-in.h */ diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c index de857488..d43af347 100644 --- a/src/language/data-io/data-list.c +++ b/src/language/data-io/data-list.c @@ -86,7 +86,7 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) bool ok; dict = in_input_program () ? dataset_dict (ds) : dict_create (); - parser = data_parser_create (); + parser = data_parser_create (dict); reader = NULL; table = -1; /* Print table if nonzero, -1=undecided. */ diff --git a/src/language/data-io/data-parser.c b/src/language/data-io/data-parser.c index eab32868..8f189b1b 100644 --- a/src/language/data-io/data-parser.c +++ b/src/language/data-io/data-parser.c @@ -41,6 +41,7 @@ /* Data parser for textual data like that read by DATA LIST. */ struct data_parser { + const struct dictionary *dict; /*Dictionary of destination */ enum data_parser_type type; /* Type of data to parse. */ int skip_records; /* Records to skip before first real data. */ casenumber max_cases; /* Max number of cases to read. */ @@ -79,7 +80,7 @@ static void set_any_sep (struct data_parser *parser); /* Creates and returns a new data parser. */ struct data_parser * -data_parser_create (void) +data_parser_create (const struct dictionary *dict) { struct data_parser *parser = xmalloc (sizeof *parser); @@ -91,6 +92,7 @@ data_parser_create (void) parser->fields = NULL; parser->field_cnt = 0; parser->field_allocated = 0; + parser->dict = dict; parser->span = true; parser->empty_line_has_field = false; @@ -531,6 +533,7 @@ parse_fixed (const struct data_parser *parser, struct dfm_reader *reader, f->format.w), encoding, f->format.type, f->format.d, f->first_column, f->first_column + f->format.w, + parser->dict, case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format)); @@ -574,6 +577,7 @@ parse_delimited_span (const struct data_parser *parser, data_in (s, encoding, f->format.type, 0, first_column, last_column, + parser->dict, case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format)); } @@ -614,6 +618,7 @@ parse_delimited_no_span (const struct data_parser *parser, data_in (s, encoding, f->format.type, 0, first_column, last_column, + parser->dict, case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format)); } diff --git a/src/language/data-io/data-parser.h b/src/language/data-io/data-parser.h index b250e91b..5a53a2f6 100644 --- a/src/language/data-io/data-parser.h +++ b/src/language/data-io/data-parser.h @@ -38,7 +38,7 @@ enum data_parser_type }; /* Creating and configuring any parser. */ -struct data_parser *data_parser_create (void); +struct data_parser *data_parser_create (const struct dictionary *dict); void data_parser_destroy (struct data_parser *); enum data_parser_type data_parser_get_type (const struct data_parser *); diff --git a/src/language/data-io/get-data.c b/src/language/data-io/get-data.c index e4ab76a9..32202bab 100644 --- a/src/language/data-io/get-data.c +++ b/src/language/data-io/get-data.c @@ -271,7 +271,7 @@ static int parse_get_txt (struct lexer *lexer, struct dataset *ds) { struct data_parser *parser = NULL; - struct dictionary *dict = NULL; + struct dictionary *dict = dict_create (); struct file_handle *fh = NULL; struct dfm_reader *reader = NULL; @@ -288,7 +288,7 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds) if (fh == NULL) goto error; - parser = data_parser_create (); + parser = data_parser_create (dict); has_type = false; data_parser_set_type (parser, DP_DELIMITED); data_parser_set_span (parser, false); @@ -465,7 +465,7 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds) } lex_match (lexer, '='); - dict = dict_create (); + record = 1; type = data_parser_get_type (parser); do diff --git a/src/language/expressions/operations.def b/src/language/expressions/operations.def index 52d4226d..d2838672 100644 --- a/src/language/expressions/operations.def +++ b/src/language/expressions/operations.def @@ -573,7 +573,7 @@ string function RTRIM (string s, string c) function NUMBER (string s, ni_format f) { union value out; - data_in (ss_head (s, f->w), LEGACY_NATIVE, f->type, f->d, 0, 0, &out, 0); + data_in (ss_head (s, f->w), LEGACY_NATIVE, f->type, f->d, 0, 0, NULL, &out, 0); return out.f; } diff --git a/src/language/lexer/value-parser.c b/src/language/lexer/value-parser.c index 2cf9cf2b..c780d86f 100644 --- a/src/language/lexer/value-parser.c +++ b/src/language/lexer/value-parser.c @@ -99,8 +99,9 @@ parse_number (struct lexer *lexer, double *x, const enum fmt_type *format) else if (lex_token (lexer) == T_STRING && format != NULL) { union value v; + assert (! (fmt_get_category (*format) & ( FMT_CAT_STRING ))); data_in (ds_ss (lex_tokstr (lexer)), LEGACY_NATIVE, - *format, 0, 0, 0, &v, 0); + *format, 0, 0, 0, NULL, &v, 0); lex_get (lexer); *x = v.f; if (*x == SYSMIS) diff --git a/src/language/xforms/recode.c b/src/language/xforms/recode.c index e8a382a3..62b03ba0 100644 --- a/src/language/xforms/recode.c +++ b/src/language/xforms/recode.c @@ -83,6 +83,8 @@ struct recode_trns { struct pool *pool; + + /* Variable types, for convenience. */ enum val_type src_type; /* src_vars[*] type. */ enum val_type dst_type; /* dst_vars[*] type. */ @@ -90,6 +92,7 @@ struct recode_trns /* Variables. */ const struct variable **src_vars; /* Source variables. */ const struct variable **dst_vars; /* Destination variables. */ + const struct dictionary *dst_dict; /* Dictionary of dst_vars */ char **dst_names; /* Name of dest variables, if they're new. */ size_t var_cnt; /* Number of variables. */ @@ -540,6 +543,8 @@ create_dst_vars (struct recode_trns *trns, struct dictionary *dict) { size_t i; + trns->dst_dict = dict; + for (i = 0; i < trns->var_cnt; i++) { const struct variable **var = &trns->dst_vars[i]; @@ -625,7 +630,7 @@ find_src_string (struct recode_trns *trns, const uint8_t *value, msg_disable (); match = data_in (ss_buffer (value, width), LEGACY_NATIVE, - FMT_F, 0, 0, 0, &uv, 0); + FMT_F, 0, 0, 0, trns->dst_dict, &uv, 0); msg_enable (); out->value.f = uv.f; break; diff --git a/src/ui/gui/find-dialog.c b/src/ui/gui/find-dialog.c index 86d29583..f24bdbd0 100644 --- a/src/ui/gui/find-dialog.c +++ b/src/ui/gui/find-dialog.c @@ -599,6 +599,7 @@ value_comparator_create (const struct variable *var, const char *target) LEGACY_NATIVE, fmt->type, 0, 0, 0, + NULL, &vc->pattern, width) ) { value_destroy (&vc->pattern, width); diff --git a/src/ui/gui/helper.c b/src/ui/gui/helper.c index 4ec4bb0c..d670002a 100644 --- a/src/ui/gui/helper.c +++ b/src/ui/gui/helper.c @@ -63,6 +63,7 @@ value_to_text (union value v, const PsppireDict *dict, struct fmt_spec format) gboolean text_to_value (const gchar *text, union value *v, + const PsppireDict *dict, struct fmt_spec format) { bool ok; @@ -86,6 +87,7 @@ text_to_value (const gchar *text, union value *v, msg_disable (); ok = data_in (ss_cstr (text), LEGACY_NATIVE, format.type, 0, 0, 0, + dict->dict, v, fmt_var_width (&format)); msg_enable (); diff --git a/src/ui/gui/helper.h b/src/ui/gui/helper.h index 2a7be4aa..f6c084d4 100644 --- a/src/ui/gui/helper.h +++ b/src/ui/gui/helper.h @@ -38,6 +38,7 @@ gchar * value_to_text (union value v, const PsppireDict *dict, struct fmt_spec f gboolean text_to_value (const gchar *text, union value *v, + const PsppireDict *dict, struct fmt_spec format); GObject *get_object_assert (GtkBuilder *builder, const gchar *name, GType type); diff --git a/src/ui/gui/missing-val-dialog.c b/src/ui/gui/missing-val-dialog.c index 9230b8a6..16a7f6f3 100644 --- a/src/ui/gui/missing-val-dialog.c +++ b/src/ui/gui/missing-val-dialog.c @@ -100,7 +100,8 @@ missing_val_dialog_accept (GtkWidget *w, gpointer data) continue; } - if ( text_to_value (text, &v, *write_spec)) + if ( text_to_value (text, &v, + dialog->dict, *write_spec)) { nvals++; mv_add_value (&dialog->mvl, &v); @@ -126,9 +127,9 @@ missing_val_dialog_accept (GtkWidget *w, gpointer data) const gchar *low_text = gtk_entry_get_text (GTK_ENTRY (dialog->low)); const gchar *high_text = gtk_entry_get_text (GTK_ENTRY (dialog->high)); - if ( text_to_value (low_text, &low_val, *write_spec) + if ( text_to_value (low_text, &low_val, dialog->dict, *write_spec) && - text_to_value (high_text, &high_val, *write_spec) ) + text_to_value (high_text, &high_val, dialog->dict, *write_spec) ) { if ( low_val.f > high_val.f ) { @@ -154,6 +155,7 @@ missing_val_dialog_accept (GtkWidget *w, gpointer data) { union value discrete_val; if ( !text_to_value (discrete_text, &discrete_val, + dialog->dict, *write_spec)) { err_dialog (_("Incorrect value for variable type"), diff --git a/src/ui/gui/psppire-data-store.c b/src/ui/gui/psppire-data-store.c index a97cbbb3..9833fb49 100644 --- a/src/ui/gui/psppire-data-store.c +++ b/src/ui/gui/psppire-data-store.c @@ -959,7 +959,8 @@ psppire_data_store_data_in (PsppireDataStore *ds, casenumber casenum, gint idx, FALSE); value_init (&value, width); ok = (datasheet_get_value (ds->datasheet, casenum, idx, &value) - && data_in (input, dict_get_encoding (dict->dict), fmt->type, 0, 0, 0, &value, width) + && data_in (input, UTF8, fmt->type, 0, 0, 0, + dict->dict, &value, width) && datasheet_put_value (ds->datasheet, casenum, idx, &value)); value_destroy (&value, width); diff --git a/src/ui/gui/text-data-import-dialog.c b/src/ui/gui/text-data-import-dialog.c index 330d2847..4baa99e0 100644 --- a/src/ui/gui/text-data-import-dialog.c +++ b/src/ui/gui/text-data-import-dialog.c @@ -1748,7 +1748,9 @@ parse_field (struct import_assistant *ia, if (field.string != NULL) { msg_disable (); + /* FIXME: NULL should be replaced with the destination dictionary */ if (!data_in (field, LEGACY_NATIVE, in->type, 0, 0, 0, + NULL, &val, var_get_width (var))) { char fmt_string[FMT_STRING_LEN_MAX + 1]; diff --git a/src/ui/gui/val-labs-dialog.c b/src/ui/gui/val-labs-dialog.c index 92a7fe8e..4b575d97 100644 --- a/src/ui/gui/val-labs-dialog.c +++ b/src/ui/gui/val-labs-dialog.c @@ -72,6 +72,7 @@ on_label_entry_change (GtkEntry *entry, gpointer data) text = gtk_entry_get_text (GTK_ENTRY (dialog->value_entry)); text_to_value (text, &v, + dialog->var_store->dict, *var_get_write_format (dialog->pv)); @@ -142,6 +143,7 @@ on_value_entry_change (GtkEntry *entry, gpointer data) union value v; text_to_value (text, &v, + dialog->var_store->dict, *var_get_write_format (dialog->pv)); @@ -268,6 +270,7 @@ on_change (GtkWidget *w, gpointer data) union value v; text_to_value (val_text, &v, + dialog->var_store->dict, *var_get_write_format (dialog->pv)); val_labs_replace (dialog->labs, &v, @@ -292,6 +295,7 @@ on_add (GtkWidget *w, gpointer data) const gchar *text = gtk_entry_get_text (GTK_ENTRY (dialog->value_entry)); text_to_value (text, &v, + dialog->var_store->dict, *var_get_write_format (dialog->pv)); diff --git a/src/ui/syntax-gen.c b/src/ui/syntax-gen.c index f063fa39..22e717ac 100644 --- a/src/ui/syntax-gen.c +++ b/src/ui/syntax-gen.c @@ -152,8 +152,9 @@ syntax_gen_number (struct string *output, v_in.f = number; s = data_out (&v_in, "FIXME", format); msg_disable (); + /* FIXME: UTF8 encoded strings will fail here */ ok = data_in (ss_cstr (s), LEGACY_NATIVE, - format->type, false, 0, 0, &v_out, 0); + format->type, false, 0, 0, NULL, &v_out, 0); msg_enable (); if (ok && v_out.f == number) { -- 2.30.2