From aa4573b5e5c219d27fbbec0b56b35bfdac77876a Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 22 Dec 2012 03:45:42 -0800 Subject: [PATCH] UPDATE: Do not update from missing values in transaction files. The documentation said that missing and all-spaces values in transaction files did not update the master file, but the implementation was wrong. Reported by Kees Varekamp . --- src/language/data-io/combine-files.c | 44 ++++++++++++++++++++++++++-- tests/language/data-io/update.at | 5 ++-- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/language/data-io/combine-files.c b/src/language/data-io/combine-files.c index e112bc08c0..b7ba87ab7c 100644 --- a/src/language/data-io/combine-files.c +++ b/src/language/data-io/combine-files.c @@ -69,6 +69,7 @@ struct comb_file /* Variables. */ struct subcase by_vars; /* BY variables in this input file. */ struct subcase src, dst; /* Data to copy to output; where to put it. */ + const struct missing_values **mv; /* Each variable's missing values. */ /* Input files. */ struct file_handle *handle; /* Input file handle. */ @@ -197,6 +198,7 @@ combine_files (enum comb_command_type command, subcase_init_empty (&file->by_vars); subcase_init_empty (&file->src); subcase_init_empty (&file->dst); + file->mv = NULL; file->handle = NULL; file->dict = NULL; file->reader = NULL; @@ -413,6 +415,7 @@ combine_files (enum comb_command_type command, size_t src_var_cnt = dict_get_var_cnt (file->dict); size_t j; + file->mv = xnmalloc (src_var_cnt, sizeof *file->mv); for (j = 0; j < src_var_cnt; j++) { struct variable *src_var = dict_get_var (file->dict, j); @@ -420,6 +423,8 @@ combine_files (enum comb_command_type command, var_get_name (src_var)); if (dst_var != NULL) { + size_t n = subcase_get_n_fields (&file->src); + file->mv[n] = var_get_missing_values (src_var); subcase_add_var (&file->src, src_var, SC_ASCEND); subcase_add_var (&file->dst, dst_var, SC_ASCEND); } @@ -633,6 +638,7 @@ close_all_comb_files (struct comb_proc *proc) subcase_destroy (&file->by_vars); subcase_destroy (&file->src); subcase_destroy (&file->dst); + free (file->mv); fh_unref (file->handle); dict_destroy (file->dict); casereader_destroy (file->reader); @@ -665,6 +671,7 @@ free_comb_proc (struct comb_proc *proc) static bool scan_table (struct comb_file *, union value by[]); static struct ccase *create_output_case (const struct comb_proc *); static void apply_case (const struct comb_file *, struct ccase *); +static void apply_nonmissing_case (const struct comb_file *, struct ccase *); static void advance_file (struct comb_file *, union value by[]); static void output_case (struct comb_proc *, struct ccase *, union value by[]); static void output_buffered_case (struct comb_proc *); @@ -757,7 +764,7 @@ execute_update (struct comb_proc *proc) { while (file->is_minimal) { - apply_case (file, output); + apply_nonmissing_case (file, output); advance_file (file, by); } } @@ -829,14 +836,45 @@ create_output_case (const struct comb_proc *proc) return output; } +static void +mark_file_used (const struct comb_file *file, struct ccase *output) +{ + if (file->in_var != NULL) + case_data_rw (output, file->in_var)->f = true; +} + /* Copies the data from FILE's case into output case OUTPUT. If FILE has an IN variable, then it is set to 1 in OUTPUT. */ static void apply_case (const struct comb_file *file, struct ccase *output) { subcase_copy (&file->src, file->data, &file->dst, output); - if (file->in_var != NULL) - case_data_rw (output, file->in_var)->f = true; + mark_file_used (file, output); +} + +/* Copies the data from FILE's case into output case OUTPUT, + skipping values that are missing or all spaces. + + If FILE has an IN variable, then it is set to 1 in OUTPUT. */ +static void +apply_nonmissing_case (const struct comb_file *file, struct ccase *output) +{ + size_t i; + + for (i = 0; i < subcase_get_n_fields (&file->src); i++) + { + const struct subcase_field *src_field = &file->src.fields[i]; + const struct subcase_field *dst_field = &file->dst.fields[i]; + const union value *src_value + = case_data_idx (file->data, src_field->case_index); + int width = src_field->width; + + if (!mv_is_value_missing (file->mv[i], src_value, MV_ANY) + && !(width > 0 && value_is_spaces (src_value, width))) + value_copy (case_data_rw_idx (output, dst_field->case_index), + src_value, width); + } + mark_file_used (file, output); } /* Advances FILE to its next case. If BY is nonnull, then FILE's is_minimal diff --git a/tests/language/data-io/update.at b/tests/language/data-io/update.at index 0a2ea2896f..41ccf21043 100644 --- a/tests/language/data-io/update.at +++ b/tests/language/data-io/update.at @@ -17,7 +17,7 @@ m4_define([CHECK_UPDATE], ]) AT_DATA([b.data], [dnl 1bN -3bO +3 O 4bP 6bQ 7bR @@ -44,6 +44,7 @@ UPDATE BY a. LIST. ]) + cat update.sps AT_CHECK([pspp -O format=csv update.sps], [0], [dnl update.sps:6: warning: UPDATE: Encountered 3 sets of duplicate cases in the master file. @@ -53,7 +54,7 @@ a,b,c,d,InA,InB 1,b,B,N,1,1 1,a,C,,1,0 2,a,D,,1,0 -3,b,E,O,1,1 +3,a,E,O,1,1 4,b,F,P,1,1 5,a,G,,1,0 5,a,H,,1,0 -- 2.30.2