bool copy_input; /* If true, copy input to output. */
union value value; /* If copy_input false, recoded value. */
int width; /* If copy_input false, output value width. */
+ int ofs; /* Lexical location. */
};
/* Describes how to recode a single value or range of values into a
static bool parse_src_vars (struct lexer *, struct recode_trns *, const struct dictionary *dict);
static bool parse_mappings (struct lexer *, struct recode_trns *,
const char *dict_encoding);
-static bool parse_dst_vars (struct lexer *, struct recode_trns *, const struct dictionary *dict);
+static bool parse_dst_vars (struct lexer *, struct recode_trns *,
+ const struct dictionary *,
+ int src_start, int src_end,
+ int mappings_start, int mappings_end);
static void add_mapping (struct recode_trns *,
size_t *map_allocated, const struct map_in *);
static bool parse_map_in (struct lexer *lexer, struct map_in *, struct pool *,
enum val_type src_type, size_t max_src_width,
const char *dict_encoding);
-static void set_map_in_generic (struct map_in *, enum map_in_type);
-static void set_map_in_num (struct map_in *, enum map_in_type, double, double);
static void set_map_in_str (struct map_in *, struct pool *,
struct substring, size_t width,
const char *dict_encoding);
static bool parse_map_out (struct lexer *lexer, struct pool *, struct map_out *);
-static void set_map_out_num (struct map_out *, double);
static void set_map_out_str (struct map_out *, struct pool *,
struct substring);
-static bool enlarge_dst_widths (struct recode_trns *);
+static bool enlarge_dst_widths (struct lexer *, struct recode_trns *,
+ int dst_start, int dst_end);
static void create_dst_vars (struct recode_trns *, struct dictionary *);
static bool recode_trns_free (void *trns_);
\f
/* Parser. */
+static bool
+parse_one_recoding (struct lexer *lexer, struct dataset *ds,
+ struct recode_trns *trns)
+{
+ struct dictionary *dict = dataset_dict (ds);
+
+ /* Parse source variable names,
+ then input to output mappings,
+ then destination variable names. */
+ int src_start = lex_ofs (lexer);
+ if (!parse_src_vars (lexer, trns, dict))
+ return false;
+ int src_end = lex_ofs (lexer) - 1;
+
+ int mappings_start = lex_ofs (lexer);
+ if (!parse_mappings (lexer, trns, dict_get_encoding (dict)))
+ return false;
+ int mappings_end = lex_ofs (lexer) - 1;
+
+ int dst_start = lex_ofs (lexer);
+ if (!parse_dst_vars (lexer, trns, dict,
+ src_start, src_end, mappings_start, mappings_end))
+ return false;
+ int dst_end = lex_ofs (lexer) - 1;
+ if (dst_end < dst_start)
+ {
+ /* There was no target variable syntax, so the target variables are the
+ same as the source variables. */
+ dst_start = src_start;
+ dst_end = src_end;
+ }
+
+ /* Ensure that all the output strings are at least as wide
+ as the widest destination variable. */
+ if (trns->dst_type == VAL_STRING
+ && !enlarge_dst_widths (lexer, trns, dst_start, dst_end))
+ return false;
+
+ /* Create destination variables, if needed.
+ This must be the final step; otherwise we'd have to
+ delete destination variables on failure. */
+ trns->dst_dict = dict;
+ if (trns->src_vars != trns->dst_vars)
+ create_dst_vars (trns, dict);
+
+ /* Done. */
+ add_transformation (ds, &recode_trns_class, trns);
+ return true;
+}
+
/* Parses the RECODE transformation. */
int
cmd_recode (struct lexer *lexer, struct dataset *ds)
{
do
{
- struct dictionary *dict = dataset_dict (ds);
- struct recode_trns *trns
- = pool_create_container (struct recode_trns, pool);
-
- /* Parse source variable names,
- then input to output mappings,
- then destintation variable names. */
- if (!parse_src_vars (lexer, trns, dict)
- || !parse_mappings (lexer, trns, dict_get_encoding (dict))
- || !parse_dst_vars (lexer, trns, dict))
+ struct pool *pool = pool_create ();
+ struct recode_trns *trns = pool_alloc (pool, sizeof *trns);
+ *trns = (struct recode_trns) { .pool = pool };
+
+ if (!parse_one_recoding (lexer, ds, trns))
{
recode_trns_free (trns);
return CMD_FAILURE;
}
-
- /* Ensure that all the output strings are at least as wide
- as the widest destination variable. */
- if (trns->dst_type == VAL_STRING)
- {
- if (! enlarge_dst_widths (trns))
- {
- recode_trns_free (trns);
- return CMD_FAILURE;
- }
- }
-
- /* Create destination variables, if needed.
- This must be the final step; otherwise we'd have to
- delete destination variables on failure. */
- trns->dst_dict = dict;
- if (trns->src_vars != trns->dst_vars)
- create_dst_vars (trns, dict);
-
- /* Done. */
- add_transformation (ds, &recode_trns_class, trns);
}
while (lex_match (lexer, T_SLASH));
parse_mappings (struct lexer *lexer, struct recode_trns *trns,
const char *dict_encoding)
{
- size_t map_allocated;
- bool have_dst_type;
- size_t i;
-
/* Find length of longest source variable. */
trns->max_src_width = var_get_width (trns->src_vars[0]);
- for (i = 1; i < trns->n_vars; i++)
+ for (size_t i = 1; i < trns->n_vars; i++)
{
size_t var_width = var_get_width (trns->src_vars[i]);
if (var_width > trns->max_src_width)
}
/* Parse the mappings in parentheses. */
- trns->mappings = NULL;
- trns->n_maps = 0;
- map_allocated = 0;
- have_dst_type = false;
+ size_t map_allocated = 0;
+ bool have_dst_type = false;
if (!lex_force_match (lexer, T_LPAREN))
return false;
do
if (!lex_match_id (lexer, "CONVERT"))
{
- struct map_out out;
- size_t first_map_idx;
- size_t i;
-
- first_map_idx = trns->n_maps;
+ size_t first_map_idx = trns->n_maps;
/* Parse source specifications. */
do
}
while (!lex_match (lexer, T_EQUALS));
+ struct map_out out;
if (!parse_map_out (lexer, trns->pool, &out))
return false;
- if (out.copy_input)
- dst_type = trns->src_type;
- else
- dst_type = val_type_from_width (out.width);
- if (have_dst_type && dst_type != trns->dst_type)
- {
- msg (SE, _("Inconsistent target variable types. "
- "Target variables "
- "must be all numeric or all string."));
- return false;
- }
-
- for (i = first_map_idx; i < trns->n_maps; i++)
+ dst_type = (out.copy_input
+ ? trns->src_type
+ : val_type_from_width (out.width));
+ for (size_t i = first_map_idx; i < trns->n_maps; i++)
trns->mappings[i].out = out;
}
else
{
/* Parse CONVERT as a special case. */
- struct map_in in;
- set_map_in_generic (&in, MAP_CONVERT);
+ struct map_in in = { .type = MAP_CONVERT };
add_mapping (trns, &map_allocated, &in);
- set_map_out_num (&trns->mappings[trns->n_maps - 1].out, 0.0);
+
+ int ofs = lex_ofs (lexer) - 1;
+ trns->mappings[trns->n_maps - 1].out = (struct map_out) {
+ .ofs = ofs,
+ };
dst_type = VAL_NUMERIC;
- if (trns->src_type != VAL_STRING
- || (have_dst_type && trns->dst_type != VAL_NUMERIC))
+ if (trns->src_type != VAL_STRING)
{
- lex_next_error (lexer, -1, -1,
- _("CONVERT requires string input values and "
- "numeric output values."));
+ lex_ofs_error (lexer, ofs, ofs,
+ _("CONVERT requires string input values."));
return false;
}
}
+ if (have_dst_type && dst_type != trns->dst_type)
+ {
+ msg (SE, _("Output values must be all numeric or all string."));
+
+ assert (trns->n_maps > 1);
+ const struct map_out *numeric = &trns->mappings[trns->n_maps - 2].out;
+ const struct map_out *string = &trns->mappings[trns->n_maps - 1].out;
+
+ if (trns->dst_type == VAL_STRING)
+ {
+ const struct map_out *tmp = numeric;
+ numeric = string;
+ string = tmp;
+ }
+
+ lex_ofs_msg (lexer, SN, numeric->ofs, numeric->ofs,
+ _("This output value is numeric."));
+ lex_ofs_msg (lexer, SN, string->ofs, string->ofs,
+ _("This output value is string."));
+ return false;
+ }
trns->dst_type = dst_type;
have_dst_type = true;
{
if (lex_match_id (lexer, "ELSE"))
- set_map_in_generic (in, MAP_ELSE);
+ *in = (struct map_in) { .type = MAP_ELSE };
else if (src_type == VAL_NUMERIC)
{
if (lex_match_id (lexer, "MISSING"))
- set_map_in_generic (in, MAP_MISSING);
+ *in = (struct map_in) { .type = MAP_MISSING };
else if (lex_match_id (lexer, "SYSMIS"))
- set_map_in_generic (in, MAP_SYSMIS);
+ *in = (struct map_in) { .type = MAP_SYSMIS };
else
{
double x, y;
if (!parse_num_range (lexer, &x, &y, NULL))
return false;
- set_map_in_num (in, x == y ? MAP_SINGLE : MAP_RANGE, x, y);
+ *in = (struct map_in) {
+ .type = x == y ? MAP_SINGLE : MAP_RANGE,
+ .x = { .f = x },
+ .y = { .f = y },
+ };
}
}
else
{
if (lex_match_id (lexer, "MISSING"))
- set_map_in_generic (in, MAP_MISSING);
+ *in = (struct map_in) { .type = MAP_MISSING };
else if (!lex_force_string (lexer))
return false;
else
set_map_in_str (in, pool, lex_tokss (lexer), max_src_width,
dict_encoding);
lex_get (lexer);
- if (lex_token (lexer) == T_ID
- && lex_id_match (ss_cstr ("THRU"), lex_tokss (lexer)))
+ if (lex_match_id (lexer, "THRU"))
{
- lex_error (lexer, _("%s is not allowed with string variables."),
- "THRU");
+ lex_next_error (lexer, -1, -1,
+ _("%s is not allowed with string variables."),
+ "THRU");
return false;
}
}
m->in = *in;
}
-/* Sets IN as a mapping of the given TYPE. */
-static void
-set_map_in_generic (struct map_in *in, enum map_in_type type)
-{
- in->type = type;
-}
-
-/* Sets IN as a numeric mapping of the given TYPE,
- with X and Y as the two numeric values. */
-static void
-set_map_in_num (struct map_in *in, enum map_in_type type, double x, double y)
-{
- in->type = type;
- in->x.f = x;
- in->y.f = y;
-}
-
/* Sets IN as a string mapping, with STRING as the string,
allocated from POOL. The string is padded with spaces on the
right to WIDTH characters long. */
struct substring string, size_t width,
const char *dict_encoding)
{
+ *in = (struct map_in) { .type = MAP_SINGLE };
+
char *s = recode_string (dict_encoding, "UTF-8",
ss_data (string), ss_length (string));
- in->type = MAP_SINGLE;
value_init_pool (pool, &in->x, width);
value_copy_buf_rpad (&in->x, width,
CHAR_CAST (uint8_t *, s), strlen (s), ' ');
{
if (lex_is_number (lexer))
{
- set_map_out_num (out, lex_number (lexer));
+ *out = (struct map_out) { .value = { .f = lex_number (lexer) } };
lex_get (lexer);
}
else if (lex_match_id (lexer, "SYSMIS"))
- set_map_out_num (out, SYSMIS);
+ *out = (struct map_out) { .value = { .f = SYSMIS } };
else if (lex_is_string (lexer))
{
set_map_out_str (out, pool, lex_tokss (lexer));
lex_get (lexer);
}
else if (lex_match_id (lexer, "COPY"))
- {
- out->copy_input = true;
- out->width = 0;
- }
+ *out = (struct map_out) { .copy_input = true };
else
{
lex_error (lexer, _("Syntax error expecting output value."));
return false;
}
+ out->ofs = lex_ofs (lexer) - 1;
return true;
}
-/* Sets OUT as a numeric mapping output with the given VALUE. */
-static void
-set_map_out_num (struct map_out *out, double value)
-{
- out->copy_input = false;
- out->value.f = value;
- out->width = 0;
-}
-
/* Sets OUT as a string mapping output with the given VALUE. */
static void
set_map_out_str (struct map_out *out, struct pool *pool,
length = 1;
}
- out->copy_input = false;
+ *out = (struct map_out) { .width = length };
value_init_pool (pool, &out->value, length);
memcpy (out->value.s, string, length);
- out->width = length;
}
/* Parses a set of target variables into TRNS->dst_vars and
TRNS->dst_names. */
static bool
parse_dst_vars (struct lexer *lexer, struct recode_trns *trns,
- const struct dictionary *dict)
+ const struct dictionary *dict, int src_start, int src_end,
+ int mappings_start, int mappings_end)
{
- size_t i;
-
+ int dst_start, dst_end;
if (lex_match_id (lexer, "INTO"))
{
+ dst_start = lex_ofs (lexer);
size_t n_names;
- size_t i;
-
if (!parse_mixed_vars_pool (lexer, dict, trns->pool,
&trns->dst_names, &n_names,
PV_NONE))
return false;
+ dst_end = lex_ofs (lexer) - 1;
if (n_names != trns->n_vars)
{
- msg (SE, _("%zu variable(s) cannot be recoded into "
- "%zu variable(s). Specify the same number "
- "of variables as source and target variables."),
- trns->n_vars, n_names);
+ msg (SE, _("Source and target variable counts must match."));
+ lex_ofs_msg (lexer, SN, src_start, src_end,
+ ngettext ("There is %zu source variable.",
+ "There are %zu source variables.",
+ trns->n_vars),
+ trns->n_vars);
+ lex_ofs_msg (lexer, SN, dst_start, dst_end,
+ ngettext ("There is %zu target variable.",
+ "There are %zu target variables.",
+ n_names),
+ n_names);
return false;
}
trns->dst_vars = pool_nalloc (trns->pool,
trns->n_vars, sizeof *trns->dst_vars);
- for (i = 0; i < trns->n_vars; i++)
+ for (size_t i = 0; i < trns->n_vars; i++)
{
const struct variable *v;
v = trns->dst_vars[i] = dict_lookup_var (dict, trns->dst_names[i]);
if (v == NULL && trns->dst_type == VAL_STRING)
{
- msg (SE, _("There is no variable named "
- "%s. (All string variables specified "
- "on INTO must already exist. Use the "
- "STRING command to create a string "
- "variable.)"),
- trns->dst_names[i]);
+ msg (SE, _("All string variables specified on INTO must already "
+ "exist. (Use the STRING command to create a string "
+ "variable.)"));
+ lex_ofs_msg (lexer, SN, dst_start, dst_end,
+ _("There is no variable named %s."),
+ trns->dst_names[i]);
return false;
}
}
-
}
else
{
+ dst_start = src_start;
+ dst_end = src_end;
+
trns->dst_vars = trns->src_vars;
if (trns->src_type != trns->dst_type)
{
- msg (SE, _("INTO is required with %s input values "
- "and %s output values."),
- trns->src_type == VAL_NUMERIC ? _("numeric") : _("string"),
- trns->dst_type == VAL_NUMERIC ? _("numeric") : _("string"));
+ if (trns->src_type == VAL_NUMERIC)
+ lex_ofs_error (lexer, mappings_start, mappings_end,
+ _("INTO is required with numeric input values "
+ "and string output values."));
+ else
+ lex_ofs_error (lexer, mappings_start, mappings_end,
+ _("INTO is required with string input values "
+ "and numeric output values."));
return false;
}
}
- for (i = 0; i < trns->n_vars; i++)
+ for (size_t i = 0; i < trns->n_vars; i++)
{
const struct variable *v = trns->dst_vars[i];
- if (v != NULL && var_get_type (v) != trns->dst_type)
+ if (v && var_get_type (v) != trns->dst_type)
{
if (trns->dst_type == VAL_STRING)
- msg (SE, _("Type mismatch. Cannot store string data in "
- "numeric variable %s."), var_get_name (v));
+ lex_ofs_error (lexer, dst_start, dst_end,
+ _("Type mismatch: cannot store string data in "
+ "numeric variable %s."), var_get_name (v));
else
- msg (SE, _("Type mismatch. Cannot store numeric data in "
- "string variable %s."), var_get_name (v));
+ lex_ofs_error (lexer, dst_start, dst_end,
+ _("Type mismatch: cannot store numeric data in "
+ "string variable %s."), var_get_name (v));
return false;
}
}
/* Ensures that all the output values in TRNS are as wide as the
widest destination variable. */
static bool
-enlarge_dst_widths (struct recode_trns *trns)
+enlarge_dst_widths (struct lexer *lexer, struct recode_trns *trns,
+ int dst_start, int dst_end)
{
- size_t i;
const struct variable *narrow_var = NULL;
int min_dst_width = INT_MAX;
trns->max_dst_width = 0;
- for (i = 0; i < trns->n_vars; i++)
+ for (size_t i = 0; i < trns->n_vars; i++)
{
const struct variable *v = trns->dst_vars[i];
if (var_get_width (v) > trns->max_dst_width)
}
}
- for (i = 0; i < trns->n_maps; i++)
+ for (size_t i = 0; i < trns->n_maps; i++)
{
struct map_out *out = &trns->mappings[i].out;
if (!out->copy_input)
{
if (out->width > min_dst_width)
{
- msg (ME,
- _("Cannot recode because the variable %s would require a width of %d bytes or greater, but it has a width of only %d bytes."),
- var_get_name (narrow_var), out->width, min_dst_width);
+ msg (SE, _("At least one target variable is too narrow for "
+ "the output values."));
+ lex_ofs_msg (lexer, SN, out->ofs, out->ofs,
+ _("This recoding output value has width %d."),
+ out->width);
+ lex_ofs_msg (lexer, SN, dst_start, dst_end,
+ _("Target variable %s only has width %d."),
+ var_get_name (narrow_var),
+ var_get_width (narrow_var));
return false;
}
static void
create_dst_vars (struct recode_trns *trns, struct dictionary *dict)
{
- size_t i;
-
- for (i = 0; i < trns->n_vars; i++)
+ for (size_t i = 0; i < trns->n_vars; i++)
{
const struct variable **var = &trns->dst_vars[i];
const char *name = trns->dst_names[i];
static const struct map_out *
find_src_numeric (struct recode_trns *trns, double value, const struct variable *v)
{
- struct mapping *m;
-
- for (m = trns->mappings; m < trns->mappings + trns->n_maps; m++)
+ for (struct mapping *m = trns->mappings; m < trns->mappings + trns->n_maps;
+ m++)
{
const struct map_in *in = &m->in;
const struct map_out *out = &m->out;
{
const char *encoding = dict_get_encoding (trns->dst_dict);
int width = var_get_width (src_var);
- struct mapping *m;
-
- for (m = trns->mappings; m < trns->mappings + trns->n_maps; m++)
+ for (struct mapping *m = trns->mappings; m < trns->mappings + trns->n_maps;
+ m++)
{
const struct map_in *in = &m->in;
struct map_out *out = &m->out;
recode_trns_proc (void *trns_, struct ccase **c, casenumber case_idx UNUSED)
{
struct recode_trns *trns = trns_;
- size_t i;
*c = case_unshare (*c);
- for (i = 0; i < trns->n_vars; i++)
+ for (size_t i = 0; i < trns->n_vars; i++)
{
const struct variable *src_var = trns->src_vars[i];
const struct variable *dst_var = trns->dst_vars[i];