COMB_TABLE /* Specified on TABLE= subcommand. */
};
+/* These commands combine multiple input files into a single master file. The
+ input files may merge string variables with different widths, right-padding
+ with spaces to the length of the longest. This data structure allows for
+ that. */
+struct comb_resizer
+ {
+ struct caseproto *output_proto;
+ int *indexes;
+ size_t n_indexes;
+ };
+
+static struct casereader *comb_resize (struct casereader *,
+ struct comb_resizer *);
+static void comb_resizer_destroy (struct comb_resizer *);
+
/* One FILE or TABLE subcommand. */
struct comb_file
{
bool is_minimal; /* Does 'data' have minimum BY values across
all input files? */
bool is_sorted; /* Is file presorted on the BY variables? */
+ struct comb_resizer *resizer; /* If necessary. */
/* IN subcommand. */
char *in_name;
struct subcase by_vars; /* BY variables in the output. */
struct casewriter *output; /* Destination for output. */
- /* Names of variables whose types or widths different among the files.
+ /* Names of variables whose types differ among the files.
It's OK if they're all dropped, but not otherwise. */
struct stringi_set different_types;
merge_dictionary (&proc, file);
}
+ for (size_t i = 0; i < dict_get_n_vars (proc.dict); i++)
+ {
+ const struct variable *mv = dict_get_var (proc.dict, i);
+ const char *name = var_get_name (mv);
+ int mw = var_get_width (mv);
+ if (!mw || stringi_set_contains (&proc.different_types, name))
+ continue;
+
+ for (size_t j = 0; j < proc.n_files; j++)
+ {
+ struct comb_file *cf = &proc.files[j];
+ struct variable *dv = dict_lookup_var (cf->dict, name);
+ if (!dv)
+ continue;
+
+ int dw = var_get_width (dv);
+ assert (dw <= mw);
+ if (dw < mw)
+ {
+ struct comb_resizer *r = cf->resizer;
+ if (!r)
+ {
+ r = xmalloc (sizeof *r);
+ *r = (struct comb_resizer) {
+ .output_proto = caseproto_ref (dict_get_proto (proc.dict)),
+ .indexes = xnmalloc (dict_get_n_vars (cf->dict),
+ sizeof *r->indexes),
+ };
+ cf->resizer = r;
+ }
+ r->indexes[r->n_indexes++] = var_get_dict_index (dv);
+ }
+ }
+ }
+
while (lex_token (lexer) != T_ENDCMD)
{
if (lex_match (lexer, T_BY))
const char *name = var_get_name (by_vars[j]);
struct variable *var = dict_lookup_var (file->dict, name);
if (var != NULL)
- subcase_add_var (&file->by_vars, var,
- subcase_get_direction (&proc.by_vars, j));
+ subcase_add (&file->by_vars, var_get_dict_index (var),
+ subcase_get_width (&proc.by_vars, j),
+ subcase_get_direction (&proc.by_vars, j));
else
{
const char *fn
else
file->reader = casereader_clone (active_file);
}
+ if (file->resizer)
+ {
+ file->reader = comb_resize (file->reader, file->resizer);
+ file->resizer = NULL;
+ }
if (!file->is_sorted)
file->reader = sort_execute (file->reader, &file->by_vars);
taint_propagate (casereader_get_taint (file->reader), taint);
if (!mv)
mv = dict_clone_var_assert (m, dv);
- else if (var_get_width (mv) == var_get_width (dv))
+ else if (var_get_type (mv) == var_get_type (dv))
{
+ if (var_get_width (dv) > var_get_width (mv))
+ var_set_width (mv, var_get_width (dv));
+
if (var_has_value_labels (dv) && !var_has_value_labels (mv))
var_set_value_labels (mv, var_get_value_labels (dv));
if (var_has_missing_values (dv) && !var_has_missing_values (mv))
different_types_error (struct comb_proc *proc,
struct lexer *lexer, const char *var_name)
{
- msg (SE, _("Variable %s has different type or width in different "
- "files."), var_name);
+ msg (SE, _("Variable %s has different types in different files."), var_name);
for (size_t i = 0; i < proc->n_files; i++)
{
const struct comb_file *ef = &proc->files[i];
fn, var_name);
else
lex_ofs_msg (lexer, SN, ef->start_ofs, ef->end_ofs,
- _("In file %s, %s is a string with width %d."),
- fn, var_name, var_get_width (ev));
+ _("In file %s, %s is a string."), fn, var_name);
}
}
dict_unref (file->dict);
casereader_destroy (file->reader);
case_unref (file->data);
+ comb_resizer_destroy (file->resizer);
free (file->in_name);
}
free (proc->files);
proc->buffered_case = NULL;
}
}
+\f
+static void
+comb_resizer_destroy (struct comb_resizer *r)
+{
+ if (!r)
+ return;
+
+ caseproto_unref (r->output_proto);
+ free (r->indexes);
+ free (r);
+}
+
+static struct ccase *
+comb_resize_translate (struct ccase *c, void *r_)
+{
+ struct comb_resizer *r = r_;
+
+ c = case_unshare (c);
+
+ for (size_t i = 0; i < r->n_indexes; i++)
+ {
+ int idx = r->indexes[i];
+ value_resize (&c->values[idx],
+ caseproto_get_width (c->proto, idx),
+ caseproto_get_width (r->output_proto, idx));
+ }
+
+ caseproto_unref (c->proto);
+ c->proto = caseproto_ref (r->output_proto);
+
+ return c;
+}
+
+static bool
+comb_resizer_translate_destroy (void *r_)
+{
+ struct comb_resizer *r = r_;
+
+ comb_resizer_destroy (r);
+ return true;
+}
+
+static struct casereader *
+comb_resize (struct casereader *subreader, struct comb_resizer *r)
+{
+ static struct casereader_translator_class class = {
+ .translate = comb_resize_translate,
+ .destroy = comb_resizer_translate_destroy,
+ };
+
+ return casereader_translate_stateless (subreader, r->output_proto,
+ &class, r);
+}
])
AT_CLEANUP
-dnl Tests for a bug that caused MATCH FILES to crash
-dnl with incompatible variables, especially but not
-dnl exclusively when one variable came from the active
-dnl file.
-AT_SETUP([MATCH FILES with incompatible variable types])
+AT_SETUP([MATCH FILES with different string widths])
AT_DATA([match-files.sps], [dnl
DATA LIST LIST NOTABLE/name (A6) x.
BEGIN DATA.
MATCH FILES/FILE='x.sav'/FILE=*/BY name.
LIST.
])
-AT_CHECK([pspp -O format=csv match-files.sps], [1], [dnl
-match-files.sps:15: error: MATCH FILES: Variable name has different type or width in different files.
-
-"match-files.sps:15.13-15.24: note: MATCH FILES: In file `x.sav', name is a string with width 6.
- 15 | MATCH FILES/FILE='x.sav'/FILE=*/BY name.
- | ^~~~~~~~~~~~"
-
-"match-files.sps:15.26-15.31: note: MATCH FILES: In file *, name is a string with width 7.
- 15 | MATCH FILES/FILE='x.sav'/FILE=*/BY name.
- | ^~~~~~"
-
-match-files.sps:16: error: Stopping syntax file processing here to avoid a cascade of dependent command failures.
+AT_CHECK([pspp -O format=csv match-files.sps], [0], [dnl
+Table: Data List
+name,x,y
+al,7.00,1.00
+brad,8.00,. @&t@
+carl,9.00,2.00
+dan,. ,3.00
])
AT_CLEANUP
carl,2
dan,3
END DATA.
-MATCH FILES/FILE='x.sav'/FILE=*/BY name.
+MATCH FILES/FILE='x.sav'/FILE=*/RENAME(name=x)/BY x.
MATCH FILES/FILE='x.sav'/IN=**.
MATCH FILES/FILE='x.sav'/IN=x/IN=y.
MATCH FILES/FILE='x.sav'/BY=x/BY=y.
MATCH FILES/FILE='x.sav'/FILE=*/RENAME(name=name2)/IN=x.
MATCH FILES/KEEP=**.
-* The different 'name' variables are a problem, but not if we drop them.
-MATCH FILES/FILE='x.sav'/FILE=*/RENAME(y=x).
-MATCH FILES/FILE='x.sav'/FILE=*/RENAME(y=x)/DROP=name.
+* It's a problem to have different types and the same name,
+but not if we drop them.
+MATCH FILES/FILE='x.sav'/FILE=*/RENAME(name=x).
+MATCH FILES/FILE='x.sav'/FILE=*/RENAME(name=x)/DROP=x.
])
AT_CHECK([pspp --testing-mode -O format=csv insert.sps], [1], [dnl
"match-files.sps:1.18: error: MATCH FILES: Cannot specify the active dataset since none has been defined.
12 | MATCH FILES/FILE=*.
| ^"
-match-files.sps:20: error: MATCH FILES: Variable name has different type or width in different files.
+match-files.sps:20: error: MATCH FILES: Variable x has different types in different files.
-"match-files.sps:20.13-20.24: note: MATCH FILES: In file `x.sav', name is a string with width 6.
- 20 | MATCH FILES/FILE='x.sav'/FILE=*/BY name.
+"match-files.sps:20.13-20.24: note: MATCH FILES: In file `x.sav', x is numeric.
+ 20 | MATCH FILES/FILE='x.sav'/FILE=*/RENAME(name=x)/BY x.
| ^~~~~~~~~~~~"
-"match-files.sps:20.26-20.31: note: MATCH FILES: In file *, name is a string with width 7.
- 20 | MATCH FILES/FILE='x.sav'/FILE=*/BY name.
+"match-files.sps:20.26-20.31: note: MATCH FILES: In file *, x is a string.
+ 20 | MATCH FILES/FILE='x.sav'/FILE=*/RENAME(name=x)/BY x.
| ^~~~~~"
"match-files.sps:21.29-21.30: error: MATCH FILES: Syntax error expecting identifier.
40 | MATCH FILES/KEEP=**.
| ^~~~"
-match-files.sps:43: error: MATCH FILES: Variable name has different type or width in different files.
+match-files.sps:44: error: MATCH FILES: Variable x has different types in different files.
-"match-files.sps:43.13-43.24: note: MATCH FILES: In file `x.sav', name is a string with width 6.
- 43 | MATCH FILES/FILE='x.sav'/FILE=*/RENAME(y=x).
+"match-files.sps:44.13-44.24: note: MATCH FILES: In file `x.sav', x is numeric.
+ 44 | MATCH FILES/FILE='x.sav'/FILE=*/RENAME(name=x).
| ^~~~~~~~~~~~"
-"match-files.sps:43.26-43.31: note: MATCH FILES: In file *, name is a string with width 7.
- 43 | MATCH FILES/FILE='x.sav'/FILE=*/RENAME(y=x).
+"match-files.sps:44.26-44.31: note: MATCH FILES: In file *, x is a string.
+ 44 | MATCH FILES/FILE='x.sav'/FILE=*/RENAME(name=x).
| ^~~~~~"
])
AT_CLEANUP