X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fxforms%2Frecode.c;h=286b2ae6413541c2b6c3fe7b423cd5cfb95782e6;hb=fccff7b9926def9d360b317239c3b98cea7bb843;hp=e2074823f9df4185aa9db4bcd29a6c6615e25bd3;hpb=8953baa61127d6d3b91f763663ea647bf3e4e793;p=pspp diff --git a/src/language/xforms/recode.c b/src/language/xforms/recode.c index e2074823f9..286b2ae641 100644 --- a/src/language/xforms/recode.c +++ b/src/language/xforms/recode.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,24 +20,26 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xalloc.h" +#include "data/case.h" +#include "data/data-in.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/transformations.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/lexer/lexer.h" +#include "language/lexer/value-parser.h" +#include "language/lexer/variable-parser.h" +#include "libpspp/assertion.h" +#include "libpspp/cast.h" +#include "libpspp/compiler.h" +#include "libpspp/i18n.h" +#include "libpspp/message.h" +#include "libpspp/pool.h" +#include "libpspp/str.h" + +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -55,26 +57,20 @@ enum map_in_type MAP_CONVERT /* "123" => 123. */ }; -/* A value involved in a RECODE mapping. */ -union recode_value - { - double f; /* Numeric. */ - char *c; /* Short or long string. */ - }; - /* Describes input values to be mapped. */ struct map_in { enum map_in_type type; /* One of MAP_*. */ - union recode_value x, y; /* Source values. */ + union value x, y; /* Source values. */ }; /* Describes the value used as output from a mapping. */ struct map_out { bool copy_input; /* If true, copy input to output. */ - union recode_value value; /* If copy_input false, recoded value. */ + union value value; /* If copy_input false, recoded value. */ int width; /* If copy_input false, output value width. */ + int ofs; /* Lexical location. */ }; /* Describes how to recode a single value or range of values into a @@ -97,89 +93,128 @@ struct recode_trns /* Variables. */ const struct variable **src_vars; /* Source variables. */ const struct variable **dst_vars; /* Destination variables. */ + const struct dictionary *dst_dict; /* Dictionary of dst_vars */ char **dst_names; /* Name of dest variables, if they're new. */ - size_t var_cnt; /* Number of variables. */ + size_t n_vars; /* Number of variables. */ /* Mappings. */ struct mapping *mappings; /* Value mappings. */ - size_t map_cnt; /* Number of mappings. */ + size_t n_maps; /* Number of mappings. */ + int max_src_width; /* Maximum width of src_vars[*]. */ + int max_dst_width; /* Maximum width of any map_out in mappings. */ }; static bool parse_src_vars (struct lexer *, struct recode_trns *, const struct dictionary *dict); -static bool parse_mappings (struct lexer *, struct recode_trns *); -static bool parse_dst_vars (struct lexer *, struct recode_trns *, const struct dictionary *dict); +static bool parse_mappings (struct lexer *, struct recode_trns *, + const char *dict_encoding); +static bool parse_dst_vars (struct lexer *, struct recode_trns *, + const struct dictionary *, + int src_start, int src_end, + int mappings_start, int mappings_end); static void add_mapping (struct recode_trns *, size_t *map_allocated, const struct map_in *); static bool parse_map_in (struct lexer *lexer, struct map_in *, struct pool *, - enum val_type src_type, size_t max_src_width); -static void set_map_in_generic (struct map_in *, enum map_in_type); -static void set_map_in_num (struct map_in *, enum map_in_type, double, double); + enum val_type src_type, size_t max_src_width, + const char *dict_encoding); static void set_map_in_str (struct map_in *, struct pool *, - const struct string *, size_t width); + struct substring, size_t width, + const char *dict_encoding); static bool parse_map_out (struct lexer *lexer, struct pool *, struct map_out *); -static void set_map_out_num (struct map_out *, double); static void set_map_out_str (struct map_out *, struct pool *, - const struct string *); + struct substring); -static void enlarge_dst_widths (struct recode_trns *); +static bool enlarge_dst_widths (struct lexer *, struct recode_trns *, + int dst_start, int dst_end); static void create_dst_vars (struct recode_trns *, struct dictionary *); -static trns_proc_func recode_trns_proc; -static trns_free_func recode_trns_free; +static bool recode_trns_free (void *trns_); + +static const struct trns_class recode_trns_class; /* Parser. */ +static bool +parse_one_recoding (struct lexer *lexer, struct dataset *ds, + struct recode_trns *trns) +{ + struct dictionary *dict = dataset_dict (ds); + + /* Parse source variable names, + then input to output mappings, + then destination variable names. */ + int src_start = lex_ofs (lexer); + if (!parse_src_vars (lexer, trns, dict)) + return false; + int src_end = lex_ofs (lexer) - 1; + + int mappings_start = lex_ofs (lexer); + if (!parse_mappings (lexer, trns, dict_get_encoding (dict))) + return false; + int mappings_end = lex_ofs (lexer) - 1; + + int dst_start = lex_ofs (lexer); + if (!parse_dst_vars (lexer, trns, dict, + src_start, src_end, mappings_start, mappings_end)) + return false; + int dst_end = lex_ofs (lexer) - 1; + if (dst_end < dst_start) + { + /* There was no target variable syntax, so the target variables are the + same as the source variables. */ + dst_start = src_start; + dst_end = src_end; + } + + /* Ensure that all the output strings are at least as wide + as the widest destination variable. */ + if (trns->dst_type == VAL_STRING + && !enlarge_dst_widths (lexer, trns, dst_start, dst_end)) + return false; + + /* Create destination variables, if needed. + This must be the final step; otherwise we'd have to + delete destination variables on failure. */ + trns->dst_dict = dict; + if (trns->src_vars != trns->dst_vars) + create_dst_vars (trns, dict); + + /* Done. */ + add_transformation (ds, &recode_trns_class, trns); + return true; +} + /* Parses the RECODE transformation. */ int cmd_recode (struct lexer *lexer, struct dataset *ds) { do { - struct recode_trns *trns - = pool_create_container (struct recode_trns, pool); - - /* Parse source variable names, - then input to output mappings, - then destintation variable names. */ - if (!parse_src_vars (lexer, trns, dataset_dict (ds) ) - || !parse_mappings (lexer, trns) - || !parse_dst_vars (lexer, trns, dataset_dict (ds))) + struct pool *pool = pool_create (); + struct recode_trns *trns = pool_alloc (pool, sizeof *trns); + *trns = (struct recode_trns) { .pool = pool }; + + if (!parse_one_recoding (lexer, ds, trns)) { recode_trns_free (trns); return CMD_FAILURE; } - - /* Ensure that all the output strings are at least as wide - as the widest destination variable. */ - if (trns->dst_type == VAL_STRING) - enlarge_dst_widths (trns); - - /* Create destination variables, if needed. - This must be the final step; otherwise we'd have to - delete destination variables on failure. */ - if (trns->src_vars != trns->dst_vars) - create_dst_vars (trns, dataset_dict (ds)); - - /* Done. */ - add_transformation (ds, - recode_trns_proc, recode_trns_free, trns); } - while (lex_match (lexer, '/')); + while (lex_match (lexer, T_SLASH)); - return lex_end_of_command (lexer); + return CMD_SUCCESS; } /* Parses a set of variables to recode into TRNS->src_vars and - TRNS->var_cnt. Sets TRNS->src_type. Returns true if + TRNS->n_vars. Sets TRNS->src_type. Returns true if successful, false on parse error. */ static bool parse_src_vars (struct lexer *lexer, struct recode_trns *trns, const struct dictionary *dict) { - if (!parse_variables_const (lexer, dict, &trns->src_vars, &trns->var_cnt, + if (!parse_variables_const (lexer, dict, &trns->src_vars, &trns->n_vars, PV_SAME_TYPE)) return false; pool_register (trns->pool, free, trns->src_vars); @@ -188,31 +223,25 @@ parse_src_vars (struct lexer *lexer, } /* Parses a set of mappings, which take the form (input=output), - into TRNS->mappings and TRNS->map_cnt. Sets TRNS->dst_type. + into TRNS->mappings and TRNS->n_maps. Sets TRNS->dst_type. Returns true if successful, false on parse error. */ static bool -parse_mappings (struct lexer *lexer, struct recode_trns *trns) +parse_mappings (struct lexer *lexer, struct recode_trns *trns, + const char *dict_encoding) { - size_t max_src_width; - size_t map_allocated; - bool have_dst_type; - size_t i; - /* Find length of longest source variable. */ - max_src_width = var_get_width (trns->src_vars[0]); - for (i = 1; i < trns->var_cnt; i++) + trns->max_src_width = var_get_width (trns->src_vars[0]); + for (size_t i = 1; i < trns->n_vars; i++) { size_t var_width = var_get_width (trns->src_vars[i]); - if (var_width > max_src_width) - max_src_width = var_width; + if (var_width > trns->max_src_width) + trns->max_src_width = var_width; } /* Parse the mappings in parentheses. */ - trns->mappings = NULL; - trns->map_cnt = 0; - map_allocated = 0; - have_dst_type = false; - if (!lex_force_match (lexer, '(')) + size_t map_allocated = 0; + bool have_dst_type = false; + if (!lex_force_match (lexer, T_LPAREN)) return false; do { @@ -220,11 +249,7 @@ parse_mappings (struct lexer *lexer, struct recode_trns *trns) if (!lex_match_id (lexer, "CONVERT")) { - struct map_out out; - size_t first_map_idx; - size_t i; - - first_map_idx = trns->map_cnt; + size_t first_map_idx = trns->n_maps; /* Parse source specifications. */ do @@ -232,55 +257,71 @@ parse_mappings (struct lexer *lexer, struct recode_trns *trns) struct map_in in; if (!parse_map_in (lexer, &in, trns->pool, - trns->src_type, max_src_width)) + trns->src_type, trns->max_src_width, + dict_encoding)) return false; add_mapping (trns, &map_allocated, &in); - lex_match (lexer, ','); + lex_match (lexer, T_COMMA); } - while (!lex_match (lexer, '=')); + while (!lex_match (lexer, T_EQUALS)); + struct map_out out; if (!parse_map_out (lexer, trns->pool, &out)) return false; - if (out.copy_input) - dst_type = trns->src_type; - else - dst_type = val_type_from_width (out.width); - if (have_dst_type && dst_type != trns->dst_type) - { - msg (SE, _("Inconsistent target variable types. " - "Target variables " - "must be all numeric or all string.")); - return false; - } - - for (i = first_map_idx; i < trns->map_cnt; i++) + dst_type = (out.copy_input + ? trns->src_type + : val_type_from_width (out.width)); + for (size_t i = first_map_idx; i < trns->n_maps; i++) trns->mappings[i].out = out; } else { /* Parse CONVERT as a special case. */ - struct map_in in; - set_map_in_generic (&in, MAP_CONVERT); + struct map_in in = { .type = MAP_CONVERT }; add_mapping (trns, &map_allocated, &in); - set_map_out_num (&trns->mappings[trns->map_cnt - 1].out, 0.0); + + int ofs = lex_ofs (lexer) - 1; + trns->mappings[trns->n_maps - 1].out = (struct map_out) { + .ofs = ofs, + }; dst_type = VAL_NUMERIC; - if (trns->src_type != VAL_STRING - || (have_dst_type && trns->dst_type != VAL_NUMERIC)) + if (trns->src_type != VAL_STRING) { - msg (SE, _("CONVERT requires string input values and " - "numeric output values.")); + lex_ofs_error (lexer, ofs, ofs, + _("CONVERT requires string input values.")); return false; } } + if (have_dst_type && dst_type != trns->dst_type) + { + msg (SE, _("Output values must be all numeric or all string.")); + + assert (trns->n_maps > 1); + const struct map_out *numeric = &trns->mappings[trns->n_maps - 2].out; + const struct map_out *string = &trns->mappings[trns->n_maps - 1].out; + + if (trns->dst_type == VAL_STRING) + { + const struct map_out *tmp = numeric; + numeric = string; + string = tmp; + } + + lex_ofs_msg (lexer, SN, numeric->ofs, numeric->ofs, + _("This output value is numeric.")); + lex_ofs_msg (lexer, SN, string->ofs, string->ofs, + _("This output value is string.")); + return false; + } trns->dst_type = dst_type; have_dst_type = true; - if (!lex_force_match (lexer, ')')) + if (!lex_force_match (lexer, T_RPAREN)) return false; } - while (lex_match (lexer, '(')); + while (lex_match (lexer, T_LPAREN)); return true; } @@ -292,41 +333,46 @@ parse_mappings (struct lexer *lexer, struct recode_trns *trns) false on parse error. */ static bool parse_map_in (struct lexer *lexer, struct map_in *in, struct pool *pool, - enum val_type src_type, size_t max_src_width) + enum val_type src_type, size_t max_src_width, + const char *dict_encoding) { if (lex_match_id (lexer, "ELSE")) - { - set_map_in_generic (in, MAP_ELSE); - } + *in = (struct map_in) { .type = MAP_ELSE }; else if (src_type == VAL_NUMERIC) { if (lex_match_id (lexer, "MISSING")) - set_map_in_generic (in, MAP_MISSING); + *in = (struct map_in) { .type = MAP_MISSING }; else if (lex_match_id (lexer, "SYSMIS")) - set_map_in_generic (in, MAP_SYSMIS); + *in = (struct map_in) { .type = MAP_SYSMIS }; else { double x, y; if (!parse_num_range (lexer, &x, &y, NULL)) return false; - set_map_in_num (in, x == y ? MAP_SINGLE : MAP_RANGE, x, y); + *in = (struct map_in) { + .type = x == y ? MAP_SINGLE : MAP_RANGE, + .x = { .f = x }, + .y = { .f = y }, + }; } } else { if (lex_match_id (lexer, "MISSING")) - set_map_in_generic (in, MAP_MISSING); + *in = (struct map_in) { .type = MAP_MISSING }; else if (!lex_force_string (lexer)) return false; - else + else { - set_map_in_str (in, pool, lex_tokstr (lexer), max_src_width); + set_map_in_str (in, pool, lex_tokss (lexer), max_src_width, + dict_encoding); lex_get (lexer); - if (lex_token (lexer) == T_ID - && lex_id_match (ss_cstr ("THRU"), ss_cstr (lex_tokid (lexer)))) + if (lex_match_id (lexer, "THRU")) { - msg (SE, _("THRU is not allowed with string variables.")); + lex_next_error (lexer, -1, -1, + _("%s is not allowed with string variables."), + "THRU"); return false; } } @@ -343,41 +389,30 @@ add_mapping (struct recode_trns *trns, size_t *map_allocated, const struct map_in *in) { struct mapping *m; - if (trns->map_cnt >= *map_allocated) + if (trns->n_maps >= *map_allocated) trns->mappings = pool_2nrealloc (trns->pool, trns->mappings, map_allocated, sizeof *trns->mappings); - m = &trns->mappings[trns->map_cnt++]; + m = &trns->mappings[trns->n_maps++]; m->in = *in; } -/* Sets IN as a mapping of the given TYPE. */ -static void -set_map_in_generic (struct map_in *in, enum map_in_type type) -{ - in->type = type; -} - -/* Sets IN as a numeric mapping of the given TYPE, - with X and Y as the two numeric values. */ -static void -set_map_in_num (struct map_in *in, enum map_in_type type, double x, double y) -{ - in->type = type; - in->x.f = x; - in->y.f = y; -} - /* Sets IN as a string mapping, with STRING as the string, allocated from POOL. The string is padded with spaces on the right to WIDTH characters long. */ static void set_map_in_str (struct map_in *in, struct pool *pool, - const struct string *string, size_t width) + struct substring string, size_t width, + const char *dict_encoding) { - in->type = MAP_SINGLE; - in->x.c = pool_alloc_unaligned (pool, width); - buf_copy_rpad (in->x.c, width, ds_data (string), ds_length (string)); + *in = (struct map_in) { .type = MAP_SINGLE }; + + char *s = recode_string (dict_encoding, "UTF-8", + ss_data (string), ss_length (string)); + value_init_pool (pool, &in->x, width); + value_copy_buf_rpad (&in->x, width, + CHAR_CAST (uint8_t *, s), strlen (s), ' '); + free (s); } /* Parses a mapping output value into OUT, allocating memory from @@ -387,121 +422,133 @@ parse_map_out (struct lexer *lexer, struct pool *pool, struct map_out *out) { if (lex_is_number (lexer)) { - set_map_out_num (out, lex_number (lexer)); + *out = (struct map_out) { .value = { .f = lex_number (lexer) } }; lex_get (lexer); } else if (lex_match_id (lexer, "SYSMIS")) - set_map_out_num (out, SYSMIS); - else if (lex_token (lexer) == T_STRING) + *out = (struct map_out) { .value = { .f = SYSMIS } }; + else if (lex_is_string (lexer)) { - set_map_out_str (out, pool, lex_tokstr (lexer)); + set_map_out_str (out, pool, lex_tokss (lexer)); lex_get (lexer); } - else if (lex_match_id (lexer, "COPY")) - { - out->copy_input = true; - out->width = 0; - } + else if (lex_match_id (lexer, "COPY")) + *out = (struct map_out) { .copy_input = true }; else { - lex_error (lexer, _("expecting output value")); + lex_error (lexer, _("Syntax error expecting output value.")); return false; } + out->ofs = lex_ofs (lexer) - 1; return true; } -/* Sets OUT as a numeric mapping output with the given VALUE. */ -static void -set_map_out_num (struct map_out *out, double value) -{ - out->copy_input = false; - out->value.f = value; - out->width = 0; -} - /* Sets OUT as a string mapping output with the given VALUE. */ static void set_map_out_str (struct map_out *out, struct pool *pool, - const struct string *value) + const struct substring value) { - const char *string = ds_data (value); - size_t length = ds_length (value); + const char *string = ss_data (value); + size_t length = ss_length (value); + + if (length == 0) + { + /* A length of 0 will yield a numeric value, which is not + what we want. */ + string = " "; + length = 1; + } - out->copy_input = false; - out->value.c = pool_alloc_unaligned (pool, length); - memcpy (out->value.c, string, length); - out->width = length; + *out = (struct map_out) { .width = length }; + value_init_pool (pool, &out->value, length); + memcpy (out->value.s, string, length); } /* Parses a set of target variables into TRNS->dst_vars and TRNS->dst_names. */ static bool parse_dst_vars (struct lexer *lexer, struct recode_trns *trns, - const struct dictionary *dict) + const struct dictionary *dict, int src_start, int src_end, + int mappings_start, int mappings_end) { - size_t i; - + int dst_start, dst_end; if (lex_match_id (lexer, "INTO")) { - size_t name_cnt; - size_t i; - + dst_start = lex_ofs (lexer); + size_t n_names; if (!parse_mixed_vars_pool (lexer, dict, trns->pool, - &trns->dst_names, &name_cnt, + &trns->dst_names, &n_names, PV_NONE)) return false; + dst_end = lex_ofs (lexer) - 1; - if (name_cnt != trns->var_cnt) + if (n_names != trns->n_vars) { - msg (SE, _("%zu variable(s) cannot be recoded into " - "%zu variable(s). Specify the same number " - "of variables as source and target variables."), - trns->var_cnt, name_cnt); + msg (SE, _("Source and target variable counts must match.")); + lex_ofs_msg (lexer, SN, src_start, src_end, + ngettext ("There is %zu source variable.", + "There are %zu source variables.", + trns->n_vars), + trns->n_vars); + lex_ofs_msg (lexer, SN, dst_start, dst_end, + ngettext ("There is %zu target variable.", + "There are %zu target variables.", + n_names), + n_names); return false; } trns->dst_vars = pool_nalloc (trns->pool, - trns->var_cnt, sizeof *trns->dst_vars); - for (i = 0; i < trns->var_cnt; i++) + trns->n_vars, sizeof *trns->dst_vars); + for (size_t i = 0; i < trns->n_vars; i++) { const struct variable *v; v = trns->dst_vars[i] = dict_lookup_var (dict, trns->dst_names[i]); if (v == NULL && trns->dst_type == VAL_STRING) { - msg (SE, _("There is no variable named " - "%s. (All string variables specified " - "on INTO must already exist. Use the " - "STRING command to create a string " - "variable.)"), - trns->dst_names[i]); + msg (SE, _("All string variables specified on INTO must already " + "exist. (Use the STRING command to create a string " + "variable.)")); + lex_ofs_msg (lexer, SN, dst_start, dst_end, + _("There is no variable named %s."), + trns->dst_names[i]); return false; } } - } else { + dst_start = src_start; + dst_end = src_end; + trns->dst_vars = trns->src_vars; if (trns->src_type != trns->dst_type) { - msg (SE, _("INTO is required with %s input values " - "and %s output values."), - trns->src_type == VAL_NUMERIC ? _("numeric") : _("string"), - trns->dst_type == VAL_NUMERIC ? _("numeric") : _("string")); + if (trns->src_type == VAL_NUMERIC) + lex_ofs_error (lexer, mappings_start, mappings_end, + _("INTO is required with numeric input values " + "and string output values.")); + else + lex_ofs_error (lexer, mappings_start, mappings_end, + _("INTO is required with string input values " + "and numeric output values.")); return false; } } - for (i = 0; i < trns->var_cnt; i++) + for (size_t i = 0; i < trns->n_vars; i++) { const struct variable *v = trns->dst_vars[i]; - if (v != NULL && var_get_type (v) != trns->dst_type) + if (v && var_get_type (v) != trns->dst_type) { - msg (SE, _("Type mismatch. Cannot store %s data in " - "%s variable %s."), - trns->dst_type == VAL_STRING ? _("string") : _("numeric"), - var_is_alpha (v) ? _("string") : _("numeric"), - var_get_name (v)); + if (trns->dst_type == VAL_STRING) + lex_ofs_error (lexer, dst_start, dst_end, + _("Type mismatch: cannot store string data in " + "numeric variable %s."), var_get_name (v)); + else + lex_ofs_error (lexer, dst_start, dst_end, + _("Type mismatch: cannot store numeric data in " + "string variable %s."), var_get_name (v)); return false; } } @@ -511,39 +558,59 @@ parse_dst_vars (struct lexer *lexer, struct recode_trns *trns, /* Ensures that all the output values in TRNS are as wide as the widest destination variable. */ -static void -enlarge_dst_widths (struct recode_trns *trns) +static bool +enlarge_dst_widths (struct lexer *lexer, struct recode_trns *trns, + int dst_start, int dst_end) { - size_t max_dst_width; - size_t i; + const struct variable *narrow_var = NULL; + int min_dst_width = INT_MAX; + trns->max_dst_width = 0; - max_dst_width = 0; - for (i = 0; i < trns->var_cnt; i++) + for (size_t i = 0; i < trns->n_vars; i++) { const struct variable *v = trns->dst_vars[i]; - if (var_get_width (v) > max_dst_width) - max_dst_width = var_get_width (v); + if (var_get_width (v) > trns->max_dst_width) + trns->max_dst_width = var_get_width (v); + + if (var_get_width (v) < min_dst_width) + { + min_dst_width = var_get_width (v); + narrow_var = v; + } } - for (i = 0; i < trns->map_cnt; i++) + for (size_t i = 0; i < trns->n_maps; i++) { struct map_out *out = &trns->mappings[i].out; - if (!out->copy_input && out->width < max_dst_width) - { - char *s = pool_alloc_unaligned (trns->pool, max_dst_width + 1); - buf_copy_rpad (s, max_dst_width + 1, out->value.c, out->width); - out->value.c = s; - } + if (!out->copy_input) + { + if (out->width > min_dst_width) + { + msg (SE, _("At least one target variable is too narrow for " + "the output values.")); + lex_ofs_msg (lexer, SN, out->ofs, out->ofs, + _("This recoding output value has width %d."), + out->width); + lex_ofs_msg (lexer, SN, dst_start, dst_end, + _("Target variable %s only has width %d."), + var_get_name (narrow_var), + var_get_width (narrow_var)); + return false; + } + + value_resize_pool (trns->pool, &out->value, + out->width, trns->max_dst_width); + } } + + return true; } /* Creates destination variables that don't already exist. */ static void create_dst_vars (struct recode_trns *trns, struct dictionary *dict) { - size_t i; - - for (i = 0; i < trns->var_cnt; i++) + for (size_t i = 0; i < trns->n_vars; i++) { const struct variable **var = &trns->dst_vars[i]; const char *name = trns->dst_names[i]; @@ -562,9 +629,8 @@ create_dst_vars (struct recode_trns *trns, struct dictionary *dict) static const struct map_out * find_src_numeric (struct recode_trns *trns, double value, const struct variable *v) { - struct mapping *m; - - for (m = trns->mappings; m < trns->mappings + trns->map_cnt; m++) + for (struct mapping *m = trns->mappings; m < trns->mappings + trns->n_maps; + m++) { const struct map_in *in = &m->in; const struct map_out *out = &m->out; @@ -576,7 +642,7 @@ find_src_numeric (struct recode_trns *trns, double value, const struct variable match = value == in->x.f; break; case MAP_MISSING: - match = var_is_num_missing (v, value, MV_ANY); + match = var_is_num_missing (v, value) != 0; break; case MAP_RANGE: match = value >= in->x.f && value <= in->y.f; @@ -601,12 +667,13 @@ find_src_numeric (struct recode_trns *trns, double value, const struct variable /* Returns the output mapping in TRNS for an input of VALUE with the given WIDTH, or a null pointer if there is no mapping. */ static const struct map_out * -find_src_string (struct recode_trns *trns, const char *value, const struct variable *src_var) +find_src_string (struct recode_trns *trns, const uint8_t *value, + const struct variable *src_var) { - struct mapping *m; + const char *encoding = dict_get_encoding (trns->dst_dict); int width = var_get_width (src_var); - - for (m = trns->mappings; m < trns->mappings + trns->map_cnt; m++) + for (struct mapping *m = trns->mappings; m < trns->mappings + trns->n_maps; + m++) { const struct map_in *in = &m->in; struct map_out *out = &m->out; @@ -615,7 +682,7 @@ find_src_string (struct recode_trns *trns, const char *value, const struct varia switch (in->type) { case MAP_SINGLE: - match = !memcmp (value, in->x.c, width); + match = !memcmp (value, in->x.s, width); break; case MAP_ELSE: match = true; @@ -623,16 +690,19 @@ find_src_string (struct recode_trns *trns, const char *value, const struct varia case MAP_CONVERT: { union value uv; + char *error; + + error = data_in (ss_buffer (CHAR_CAST_BUG (char *, value), width), + C_ENCODING, FMT_F, settings_get_fmt_settings (), + &uv, 0, encoding); + match = error == NULL; + free (error); - msg_disable (); - match = data_in (ss_buffer (value, width), LEGACY_NATIVE, - FMT_F, 0, 0, 0, &uv, 0); - msg_enable (); out->value.f = uv.f; break; } case MAP_MISSING: - match = var_is_str_missing (src_var, value, MV_ANY); + match = var_is_str_missing (src_var, value) != 0; break; default: NOT_REACHED (); @@ -646,47 +716,48 @@ find_src_string (struct recode_trns *trns, const char *value, const struct varia } /* Performs RECODE transformation. */ -static int +static enum trns_result recode_trns_proc (void *trns_, struct ccase **c, casenumber case_idx UNUSED) { struct recode_trns *trns = trns_; - size_t i; *c = case_unshare (*c); - for (i = 0; i < trns->var_cnt; i++) + for (size_t i = 0; i < trns->n_vars; i++) { const struct variable *src_var = trns->src_vars[i]; const struct variable *dst_var = trns->dst_vars[i]; - - const union value *src_data = case_data (*c, src_var); - union value *dst_data = case_data_rw (*c, dst_var); - const struct map_out *out; if (trns->src_type == VAL_NUMERIC) - out = find_src_numeric (trns, src_data->f, src_var); + out = find_src_numeric (trns, case_num (*c, src_var), src_var); else - out = find_src_string (trns, src_data->s, src_var); + out = find_src_string (trns, case_str (*c, src_var), src_var); if (trns->dst_type == VAL_NUMERIC) { + double *dst = case_num_rw (*c, dst_var); if (out != NULL) - dst_data->f = !out->copy_input ? out->value.f : src_data->f; + *dst = !out->copy_input ? out->value.f : case_num (*c, src_var); else if (trns->src_vars != trns->dst_vars) - dst_data->f = SYSMIS; + *dst = SYSMIS; } else { + char *dst = CHAR_CAST_BUG (char *, case_str_rw (*c, dst_var)); if (out != NULL) { if (!out->copy_input) - memcpy (dst_data->s, out->value.c, var_get_width (dst_var)); + memcpy (dst, out->value.s, var_get_width (dst_var)); else if (trns->src_vars != trns->dst_vars) - buf_copy_rpad (dst_data->s, var_get_width (dst_var), - src_data->s, var_get_width (src_var)); + { + union value *dst_data = case_data_rw (*c, dst_var); + const union value *src_data = case_data (*c, src_var); + value_copy_rpad (dst_data, var_get_width (dst_var), + src_data, var_get_width (src_var), ' '); + } } else if (trns->src_vars != trns->dst_vars) - memset (dst_data->s, ' ', var_get_width (dst_var)); + memset (dst, ' ', var_get_width (dst_var)); } } @@ -701,3 +772,9 @@ recode_trns_free (void *trns_) pool_destroy (trns->pool); return true; } + +static const struct trns_class recode_trns_class = { + .name = "RECODE", + .execute = recode_trns_proc, + .destroy = recode_trns_free, +};