From: Ben Pfaff Date: Sun, 30 Apr 2023 00:47:05 +0000 (-0700) Subject: combine-files: Only give an error for different types if they're kept. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6e30f48a8f7ee01f013318866176884e511aafe7;p=pspp combine-files: Only give an error for different types if they're kept. Until now, ADD FILES, MATCH FILES, and UPDATE always gave an error if variables in the input files had different types or widths and they weren't renamed to avoid it. This commit makes them only do that if the variables in question aren't dropped. --- diff --git a/doc/combining.texi b/doc/combining.texi index a237c1a5bd..751612a874 100644 --- a/doc/combining.texi +++ b/doc/combining.texi @@ -120,6 +120,8 @@ variable with a given name, those variables must all have the same type (numeric or string) and, for string variables, the same width. Variables are matched after renaming with the @subcmd{RENAME} subcommand. Thus, @subcmd{RENAME} can be used to resolve conflicts. +Only variables in the output file can conflict, so @subcmd{DROP} or +@subcmd{KEEP}, as described below, can also resolve a conflict. @item The variable label for each output variable is taken from the first diff --git a/src/language/commands/combine-files.c b/src/language/commands/combine-files.c index dd54f90a93..e3c8dc5093 100644 --- a/src/language/commands/combine-files.c +++ b/src/language/commands/combine-files.c @@ -38,6 +38,7 @@ #include "libpspp/i18n.h" #include "libpspp/message.h" #include "libpspp/string-array.h" +#include "libpspp/stringi-set.h" #include "libpspp/taint.h" #include "math/sort.h" @@ -97,6 +98,10 @@ struct comb_proc struct subcase by_vars; /* BY variables in the output. */ struct casewriter *output; /* Destination for output. */ + /* Names of variables whose types or widths different among the files. + It's OK if they're all dropped, but not otherwise. */ + struct stringi_set different_types; + size_t *var_sources; size_t n_var_sources, allocated_var_sources; @@ -117,8 +122,9 @@ static int combine_files (enum comb_command_type, struct lexer *, static void free_comb_proc (struct comb_proc *); static void close_all_comb_files (struct comb_proc *); -static bool merge_dictionary (struct comb_proc *, struct lexer *, - struct comb_file *); +static void merge_dictionary (struct comb_proc *, struct comb_file *); +static void different_types_error (struct comb_proc *, struct lexer *, + const char *var_name); static void execute_update (struct comb_proc *); static void execute_match_files (struct comb_proc *); @@ -154,6 +160,7 @@ combine_files (enum comb_command_type command, { struct comb_proc proc = { .dict = dict_create (get_default_encoding ()), + .different_types = STRINGI_SET_INITIALIZER (proc.different_types), }; bool saw_by = false; @@ -265,8 +272,7 @@ combine_files (enum comb_command_type command, sort_ofs = MIN (sort_ofs, lex_ofs (lexer) - 1); } - if (!merge_dictionary (&proc, lexer, file)) - goto error; + merge_dictionary (&proc, file); } while (lex_token (lexer) != T_ENDCMD) @@ -286,12 +292,24 @@ combine_files (enum comb_command_type command, if (!parse_sort_criteria (lexer, proc.dict, &proc.by_vars, &by_vars, NULL)) goto error; + size_t n_by_vars = subcase_get_n_fields (&proc.by_vars); + + for (size_t i = 0; i < n_by_vars; i++) + { + const char *name = var_get_name (by_vars[i]); + if (stringi_set_contains (&proc.different_types, name)) + { + different_types_error (&proc, lexer, name); + free (by_vars); + goto error; + } + } bool ok = true; for (size_t i = 0; i < proc.n_files; i++) { struct comb_file *file = &proc.files[i]; - for (size_t j = 0; j < subcase_get_n_fields (&proc.by_vars); j++) + for (size_t j = 0; j < n_by_vars; j++) { const char *name = var_get_name (by_vars[j]); struct variable *var = dict_lookup_var (file->dict, name); @@ -377,6 +395,21 @@ combine_files (enum comb_command_type command, } } + if (!stringi_set_is_empty (&proc.different_types)) + { + const char *var_name; + const struct stringi_set_node *node; + bool any_errors = false; + STRINGI_SET_FOR_EACH (var_name, node, &proc.different_types) + if (dict_lookup_var (proc.dict, var_name)) + { + any_errors = true; + different_types_error (&proc, lexer, var_name); + } + if (any_errors) + goto error; + } + if (!saw_by) { if (command == COMB_UPDATE) @@ -501,9 +534,8 @@ combine_files (enum comb_command_type command, } /* Merge the dictionary for file F into master dictionary for PROC. */ -static bool -merge_dictionary (struct comb_proc *proc, struct lexer *lexer, - struct comb_file *f) +static void +merge_dictionary (struct comb_proc *proc, struct comb_file *f) { struct dictionary *m = proc->dict; struct dictionary *d = f->dict; @@ -560,34 +592,8 @@ merge_dictionary (struct comb_proc *proc, struct lexer *lexer, sizeof *proc->var_sources); proc->var_sources[proc->n_var_sources++] = f - proc->files; } - else + else if (var_get_width (mv) == var_get_width (dv)) { - if (var_get_width (mv) != var_get_width (dv)) - { - const char *var_name = var_get_name (dv); - msg (SE, _("Variable %s has different type or width in different " - "files."), var_name); - - for (size_t j = 0; j < 2; j++) - { - const struct variable *ev = !j ? mv : dv; - const struct comb_file *ef - = !j ? &proc->files[proc->var_sources[var_get_dict_index (mv)]] : f; - const char *fn = ef->handle ? fh_get_name (ef->handle) : "*"; - - if (var_is_numeric (ev)) - lex_ofs_msg (lexer, SN, ef->start_ofs, ef->end_ofs, - _("In file %s, %s is numeric."), - fn, var_name); - else - lex_ofs_msg (lexer, SN, ef->start_ofs, ef->end_ofs, - _("In file %s, %s is a string with width %d."), - fn, var_name, var_get_width (ev)); - } - - return false; - } - if (var_has_value_labels (dv) && !var_has_value_labels (mv)) var_set_value_labels (mv, var_get_value_labels (dv)); if (var_has_missing_values (dv) && !var_has_missing_values (mv)) @@ -595,9 +601,34 @@ merge_dictionary (struct comb_proc *proc, struct lexer *lexer, if (var_get_label (dv) && !var_get_label (mv)) var_set_label (mv, var_get_label (dv)); } + else + stringi_set_insert (&proc->different_types, var_get_name (mv)); } +} - return true; +static void +different_types_error (struct comb_proc *proc, + struct lexer *lexer, const char *var_name) +{ + msg (SE, _("Variable %s has different type or width in different " + "files."), var_name); + for (size_t i = 0; i < proc->n_files; i++) + { + const struct comb_file *ef = &proc->files[i]; + const struct variable *ev = dict_lookup_var (ef->dict, var_name); + if (!ev) + continue; + + const char *fn = ef->handle ? fh_get_name (ef->handle) : "*"; + if (var_is_numeric (ev)) + lex_ofs_msg (lexer, SN, ef->start_ofs, ef->end_ofs, + _("In file %s, %s is numeric."), + fn, var_name); + else + lex_ofs_msg (lexer, SN, ef->start_ofs, ef->end_ofs, + _("In file %s, %s is a string with width %d."), + fn, var_name, var_get_width (ev)); + } } /* If VAR_NAME is non-NULL, attempts to create a @@ -671,6 +702,7 @@ free_comb_proc (struct comb_proc *proc) subcase_uninit (&proc->by_vars); case_unref (proc->buffered_case); free (proc->var_sources); + stringi_set_destroy (&proc->different_types); } static bool scan_table (struct comb_file *, union value by[]); diff --git a/tests/language/commands/add-files.at b/tests/language/commands/add-files.at index 203004bdac..e84b1bd8f5 100644 --- a/tests/language/commands/add-files.at +++ b/tests/language/commands/add-files.at @@ -123,3 +123,6 @@ CHECK_ADD_FILES([interleave], [inline], [sav]) CHECK_ADD_FILES([concatenate], [sav], [sav]) CHECK_ADD_FILES([concatenate], [sav], [inline]) CHECK_ADD_FILES([concatenate], [inline], [sav]) + +dnl ADD FILES doesn't have syntax error tests because they are all covered +dnl by the tests for MATCH FILES. \ No newline at end of file diff --git a/tests/language/commands/match-files.at b/tests/language/commands/match-files.at index 74a1f3c3be..b7f5dd30d1 100644 --- a/tests/language/commands/match-files.at +++ b/tests/language/commands/match-files.at @@ -298,6 +298,10 @@ MATCH FILES/FILE='x.sav'/FILE=*/RENAME(name=name2)/FIRST=x. MATCH FILES/FILE='x.sav'/FILE=*/RENAME(name=name2)/LAST=x. MATCH FILES/FILE='x.sav'/FILE=*/RENAME(name=name2)/IN=x. MATCH FILES/KEEP=**. + +* The different 'name' variables are a problem, but not if we drop them. +MATCH FILES/FILE='x.sav'/FILE=*/RENAME(y=x). +MATCH FILES/FILE='x.sav'/FILE=*/RENAME(y=x)/DROP=name. ]) AT_CHECK([pspp --testing-mode -O format=csv insert.sps], [1], [dnl "match-files.sps:1.18: error: MATCH FILES: Cannot specify the active dataset since none has been defined. @@ -397,5 +401,15 @@ match-files.sps:20: error: MATCH FILES: Variable name has different type or widt "match-files.sps:40.13-40.16: error: MATCH FILES: Syntax error expecting FILE or TABLE. 40 | MATCH FILES/KEEP=**. | ^~~~" + +match-files.sps:43: error: MATCH FILES: Variable name has different type or width in different files. + +"match-files.sps:43.13-43.24: note: MATCH FILES: In file `x.sav', name is a string with width 6. + 43 | MATCH FILES/FILE='x.sav'/FILE=*/RENAME(y=x). + | ^~~~~~~~~~~~" + +"match-files.sps:43.26-43.31: note: MATCH FILES: In file *, name is a string with width 7. + 43 | MATCH FILES/FILE='x.sav'/FILE=*/RENAME(y=x). + | ^~~~~~" ]) AT_CLEANUP