From: Ben Pfaff Date: Sun, 3 Mar 2024 20:05:05 +0000 (-0800) Subject: AUTORECODE: Fix crash if scratch variables created before source variables. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=591d8ce3170bfa6ec64ea5732ec77d64e9d8ade4;p=pspp AUTORECODE: Fix crash if scratch variables created before source variables. Thanks to Frans Houweling for reporting this bug. --- diff --git a/doc/transformation.texi b/doc/transformation.texi index 14d189b2df..18d7c2fa19 100644 --- a/doc/transformation.texi +++ b/doc/transformation.texi @@ -313,7 +313,10 @@ whitespace are recoded as SYSMIS. If @subcmd{/BLANK=VALID} is specified then th are allocated a value like any other. @subcmd{/BLANK} is not relevant to numeric values. @subcmd{/BLANK=VALID} is the default. -@cmd{AUTORECODE} is a procedure. It causes the data to be read. +@cmd{AUTORECODE} is a procedure. It causes the data to be read. It +ignores @cmd{TEMPORARY} (@pxref{TEMPORARY}), so that ``temporary'' +transformations become permanent. + @subsection Autorecode Example diff --git a/src/language/commands/autorecode.c b/src/language/commands/autorecode.c index 775b11e835..06fd73bdc3 100644 --- a/src/language/commands/autorecode.c +++ b/src/language/commands/autorecode.c @@ -64,7 +64,8 @@ struct arc_item struct arc_spec { int width; /* Variable width. */ - int src_idx; /* Case index of source variable. */ + const struct variable *src_var; /* Source variable. */ + size_t src_idx; /* Source variable index. */ char *src_name; /* Name of source variable. */ struct fmt_spec format; /* Print format in source variable. */ size_t dst_idx; /* Target variable. */ @@ -232,19 +233,28 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds) } } - /* Allocate all the specs and the rec_items that they point to. - - If GROUP is specified, there is only a single global rec_items, with the - maximum width 'width', and all of the specs point to it; otherwise each - spec has its own rec_items. */ - arc->specs = xmalloc (n_dsts * sizeof *arc->specs); - arc->n_specs = n_dsts; - for (size_t i = 0; i < n_dsts; i++) - { + /* It's difficult to know what the semantics of AUTORECODE with TEMPORARY + (or with scratch variables) should be, because the data read to figure + out the recoding table might be completely different from the data being + recoded. Imagine, for example, a temporary transformation like "COMPUTE + VAR=VAR+1000." or "COMPUTE VAR=0." */ + if (proc_make_temporary_transformations_permanent(ds)) + lex_ofs_msg (lexer, SW, 0, 0, + _("AUTORECODE ignores TEMPORARY. " + "Temporary transformations will be made permanent.")); + + /* Allocate all the specs and the rec_items that they point to. + + If GROUP is specified, there is only a single global rec_items, with the + maximum width 'width', and all of the specs point to it; otherwise each + spec has its own rec_items. */ + arc->specs = xmalloc(n_dsts * sizeof *arc->specs); + arc->n_specs = n_dsts; + for (size_t i = 0; i < n_dsts; i++) { struct arc_spec *spec = &arc->specs[i]; spec->width = var_get_width (src_vars[i]); - spec->src_idx = var_get_dict_index (src_vars[i]); + spec->src_var = src_vars[i]; spec->src_name = xstrdup (var_get_name (src_vars[i])); spec->format = var_get_print_format (src_vars[i]); @@ -290,7 +300,7 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds) for (size_t i = 0; i < arc->n_specs; i++) { struct arc_spec *spec = &arc->specs[i]; - const union value *value = case_data_idx (c, spec->src_idx); + const union value *value = case_data (c, spec->src_var); if (spec->width == 0 && value->f == SYSMIS) { /* AUTORECODE never changes the system-missing value. @@ -326,7 +336,7 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds) ok = proc_commit (ds) && ok; /* Re-fetch dictionary because it might have changed (if TEMPORARY was in - use). */ + use or scratch variables were used). */ dict = dataset_dict (ds); /* Create transformation. */ @@ -334,6 +344,8 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds) { struct arc_spec *spec = &arc->specs[i]; + spec->src_idx = var_get_dict_index (spec->src_var); + /* Create destination variable. */ struct variable *dst = dict_create_var_assert (dict, dst_names[i], 0); spec->dst_idx = var_get_dict_index (dst); diff --git a/tests/language/commands/autorecode.at b/tests/language/commands/autorecode.at index f639b078a4..5e4e653472 100644 --- a/tests/language/commands/autorecode.at +++ b/tests/language/commands/autorecode.at @@ -457,6 +457,10 @@ Variable,Record,Columns,Format X,1,1-5,A5 Y,1,7-7,F1.0 +"autorecode.sps:16.1-16.10: warning: AUTORECODE: AUTORECODE ignores TEMPORARY. Temporary transformations will be made permanent. + 16 | autorecode x y into A B/descend/print. + | ^~~~~~~~~~" + Table: Recoding X into A. Old Value,New Value,Value Label lajks,1,lajks @@ -470,18 +474,75 @@ Old Value,New Value,Value Label Table: Data List X,Y,A,B -lasdj,1,.,. -asdfk,0,2,. asdfj,2,3,2 -asdfj,1,3,. asdfk,2,2,2 asdfj,9,3,1 lajks,9,1,1 -asdfk,0,2,. -asdfk,1,2,. ]) AT_CLEANUP +dnl AUTORECODE had a use-after-free error when scratch variables +dnl caused the variables being recoded to change indexes. +dnl See bug report at: +dnl https://lists.gnu.org/archive/html/bug-gnu-pspp/2024-02/msg00011.html +AT_SETUP([AUTORECODE with scratch variables]) +AT_DATA([autorecode.sps], + [data list /X0 1-5(a) Y0 7. +begin data. +lasdj 1 +asdfk 0 +asdfj 2 +asdfj 1 +asdfk 2 +asdfj 9 +lajks 9 +asdfk 0 +asdfk 1 +end data. +compute #foo = 0. +compute #bar = 1. +compute #baz = 2. +compute #quux = 3. +string X(a5). +numeric Y(f1.0). +compute x = x0. +compute y = y0. +autorecode x y into A B/descend/print. +list x y a b. +]) +AT_CHECK([pspp -O format=csv autorecode.sps], [0], [dnl +Table: Reading 1 record from INLINE. +Variable,Record,Columns,Format +X0,1,1-5,A5 +Y0,1,7-7,F1.0 + +Table: Recoding X into A. +Old Value,New Value,Value Label +lasdj,1,lasdj +lajks,2,lajks +asdfk,3,asdfk +asdfj,4,asdfj + +Table: Recoding Y into B. +Old Value,New Value,Value Label +9,1,9 +2,2,2 +1,3,1 +0,4,0 + +Table: Data List +X,Y,A,B +lasdj,1,1,3 +asdfk,0,3,4 +asdfj,2,4,2 +asdfj,1,4,3 +asdfk,2,3,2 +asdfj,9,4,1 +lajks,9,2,1 +asdfk,0,3,4 +asdfk,1,3,3 +]) +AT_CLEANUP dnl For compatibility, make sure that /INTO (with leading slash) is accepted dnl (bug #48762)