From 335b56abf229fcb446c2311b2a64faf5f64e5a38 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 2 Sep 2019 06:37:02 +0000 Subject: [PATCH] AUTORECODE: Properly handle value labels. Value labels are supposed to be copied from source variables, where they are present. Also, it's better to format the source values using the print format rather than just assuming that %g is the right format. --- doc/transformation.texi | 4 ++++ src/language/stats/autorecode.c | 37 +++++++++++------------------- tests/language/stats/autorecode.at | 8 +++---- 3 files changed, 21 insertions(+), 28 deletions(-) diff --git a/doc/transformation.texi b/doc/transformation.texi index 5ee7febef0..01d26621d4 100644 --- a/doc/transformation.texi +++ b/doc/transformation.texi @@ -267,6 +267,10 @@ the non-missing values. The system-missing value is always recoded into the system-missing variable in target variables. +If a source value has a value label, then that value label is retained +for the new value in the target variable. Otherwise, the source value +itself becomes each new value's label. + @subcmd{PRINT} is currently ignored. The @subcmd{GROUP} subcommand is relevant only if more than one variable is to be diff --git a/src/language/stats/autorecode.c b/src/language/stats/autorecode.c index 14332acabe..e0b5773909 100644 --- a/src/language/stats/autorecode.c +++ b/src/language/stats/autorecode.c @@ -54,6 +54,7 @@ struct arc_item union value from; /* Original value. */ int width; /* Width of the original value */ bool missing; /* Is 'from' missing in its source varible? */ + char *value_label; /* Value label in source variable, if any. */ double to; /* Recoded value. */ }; @@ -296,12 +297,19 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds) if (find_arc_item (spec->items, value, width, hash)) continue; + struct string value_label = DS_EMPTY_INITIALIZER; + var_append_value_name__ (src_vars[i], value, + SETTINGS_VALUE_SHOW_LABEL, &value_label); + struct arc_item *item = xmalloc (sizeof *item); item->width = width; value_clone (&item->from, value, width); item->missing = mv_is_value_missing_varwidth (&spec->mv, value, spec->width, MV_ANY); + item->value_label = ds_steal_cstr (&value_label); hmap_insert (&spec->items->ht, &item->hmap_node, hash); + + ds_destroy (&value_label); } bool ok = casereader_destroy (input); ok = proc_commit (ds) && ok; @@ -372,35 +380,15 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds) mv_destroy (&mv); } - /* Add value labels to the destination variable which indicate - the source value from whence the new value comes. */ + /* Add value labels to the destination variable. */ for (j = 0; j < n_items; j++) { - const union value *from = &items[j]->from; - const int src_width = items[j]->width; - char *recoded_value; - if (src_width > 0) - { - const char *str = CHAR_CAST_BUG (const char *, from->s); - - recoded_value = recode_string (UTF8, dict_get_encoding (dict), - str, src_width); - } - else - recoded_value = c_xasprintf ("%.*g", DBL_DIG + 1, from->f); - - /* Remove trailing whitespace. */ - size_t len = strlen (recoded_value); - while (len > 0 && recoded_value[len - 1] == ' ') - recoded_value[--len] = '\0'; - - /* Add value label, if it would be nonempty. */ - if (len) + const char *value_label = items[j]->value_label; + if (value_label && value_label[0]) { union value to_val = { .f = items[j]->to }; - var_add_value_label (spec->dst, &to_val, recoded_value); + var_add_value_label (spec->dst, &to_val, value_label); } - free (recoded_value); } /* Free array. */ @@ -438,6 +426,7 @@ arc_free (struct autorecode_pgm *arc) &spec->items->ht) { value_destroy (&item->from, item->width); + free (item->value_label); hmap_delete (&spec->items->ht, &item->hmap_node); free (item); } diff --git a/tests/language/stats/autorecode.at b/tests/language/stats/autorecode.at index 0b688adbb0..0397e78e4a 100644 --- a/tests/language/stats/autorecode.at +++ b/tests/language/stats/autorecode.at @@ -131,11 +131,13 @@ thingummies 6 oojimiflips 7 end data. +value labels /s 'thingummies' 'Funny sticky things'. + autorecode s into new. list. -display dictionary. +display dictionary/variables=new. ]) AT_CHECK([pspp -O format=csv ar.sps], [0], @@ -151,15 +153,13 @@ oojimiflips,7.00,2.00 Table: Variables Name,Position,Label,Measurement Level,Role,Width,Alignment,Print Format,Write Format,Missing Values -s,1,,Nominal,Input,16,Left,A16,A16, -x,2,,Scale,Input,8,Right,F8.2,F8.2, new,3,,Scale,Input,8,Right,F8.2,F8.2, Table: Value Labels Variable Value,,Label new,1.00,oojars ,2.00,oojimiflips -,3.00,thingummies +,3.00,Funny sticky things ,4.00,widgets ]) -- 2.30.2