- return ok;
-}
-
-/* Frees all the data for the MATCH FILES procedure. */
-static void
-mtf_free (struct mtf_proc *mtf)
-{
- mtf_close_all_files (mtf);
- dict_destroy (mtf->dict);
- casewriter_destroy (mtf->output);
- case_destroy (&mtf->buffered_case);
- case_destroy (&mtf->prev_BY_case);
-}
-
-/* Reads the next record into FILE, if possible, and update MTF's
- nonempty_files count if not. */
-static bool
-mtf_read_record (struct mtf_proc *mtf, struct mtf_file *file)
-{
- case_destroy (&file->input);
- if (!casereader_read (file->reader, &file->input))
- {
- mtf->nonempty_files--;
- return false;
- }
- else
- return true;
-}
-
-/* Compare the BY variables for files A and B; return -1 if A <
- B, 0 if A == B, 1 if A > B. (If there are no BY variables,
- then all records are equal.) */
-static inline int
-mtf_compare_BY_values (struct mtf_proc *mtf,
- struct mtf_file *a, struct mtf_file *b)
-{
- return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
-}
-
-/* Processes input files and write one case to the output file. */
-static void
-mtf_process_case (struct mtf_proc *mtf)
-{
- struct ccase c;
- struct mtf_file *min;
- struct mtf_file *file;
- int min_sequence;
- size_t i;
-
- /* Find the set of one or more FILEs whose BY values are
- minimal, as well as the set of zero or more TABLEs whose BY
- values equal those of the minimum FILEs.
-
- After each iteration of the loop, this invariant holds: the
- FILEs with minimum BY values thus far have "sequence"
- members equal to min_sequence, and "min" points to one of
- the mtf_files whose case has those minimum BY values, and
- similarly for TABLEs. */
- min_sequence = 0;
- min = NULL;
- ll_for_each (file, struct mtf_file, ll, &mtf->files)
- if (case_is_null (&file->input))
- file->sequence = -1;
- else if (file->type == MTF_FILE)
- {
- int cmp = min != NULL ? mtf_compare_BY_values (mtf, min, file) : 1;
- if (cmp <= 0)
- file->sequence = cmp < 0 ? -1 : min_sequence;
- else
- {
- file->sequence = ++min_sequence;
- min = file;
- }
- }
- else
- {
- int cmp;
- assert (min != NULL);
- do
- {
- cmp = mtf_compare_BY_values (mtf, min, file);
- }
- while (cmp > 0 && mtf_read_record (mtf, file));
- file->sequence = cmp == 0 ? min_sequence : -1;
- }
-
- /* Form the output case from the input cases. */
- case_create (&c, dict_get_next_value_idx (mtf->dict));
- for (i = 0; i < dict_get_var_cnt (mtf->dict); i++)
- {
- struct variable *v = dict_get_var (mtf->dict, i);
- value_set_missing (case_data_rw (&c, v), var_get_width (v));
- }
- ll_for_each_reverse (file, struct mtf_file, ll, &mtf->files)
- {
- bool include_file = file->sequence == min_sequence;
- if (include_file)
- for (i = 0; i < file->var_cnt; i++)
- {
- const struct mtf_variable *mv = &file->vars[i];
- const union value *in = case_data (&file->input, mv->in_var);
- union value *out = case_data_rw (&c, mv->out_var);
- value_copy (out, in, var_get_width (mv->in_var));
- }
- if (file->in_var != NULL)
- case_data_rw (&c, file->in_var)->f = include_file;
- }
-
- /* Write the output case. */
- if (mtf->first == NULL && mtf->last == NULL)
- {
- /* With no FIRST or LAST variables, it's trivial. */
- casewriter_write (mtf->output, &c);
- }
- else
- {
- /* It's harder with LAST, because we can't know whether
- this case is the last in a group until we've prepared
- the *next* case also. Thus, we buffer the previous
- output case until the next one is ready.
-
- We also have to save a copy of one of the previous input
- cases, so that we can compare the BY variables. We
- can't compare the BY variables between the current
- output case and the saved one because the BY variables
- might not be in the output (the user is allowed to drop
- them). */
- bool new_BY;
- if (mtf->prev_BY != NULL)
- {
- new_BY = case_compare_2dict (&min->input, &mtf->prev_BY_case,
- min->by, mtf->prev_BY,
- mtf->by_cnt);
- if (mtf->last != NULL)
- case_data_rw (&mtf->buffered_case, mtf->last)->f = new_BY;
- casewriter_write (mtf->output, &mtf->buffered_case);
- }
- else
- new_BY = true;
-
- case_move (&mtf->buffered_case, &c);
- if (mtf->first != NULL)
- case_data_rw (&mtf->buffered_case, mtf->first)->f = new_BY;
-
- if (new_BY)
- {
- mtf->prev_BY = min->by;
- case_destroy (&mtf->prev_BY_case);
- case_clone (&mtf->prev_BY_case, &min->input);
- }
- }
-
- /* Read another record from each input file FILE with minimum
- values. */
- ll_for_each (file, struct mtf_file, ll, &mtf->files)
- if (file->type == MTF_FILE)
- {
- if (file->sequence == min_sequence)
- mtf_read_record (mtf, file);
- }
- else
- break;
-}
-
-/* Merge the dictionary for file F into master dictionary M. */
-static bool
-mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
-{
- struct dictionary *d = f->dict;
- const char *d_docs, *m_docs;
- int i;
-
- if (dict_get_label (m) == NULL)
- dict_set_label (m, dict_get_label (d));
-
- d_docs = dict_get_documents (d);
- m_docs = dict_get_documents (m);
- if (d_docs != NULL)
- {
- if (m_docs == NULL)
- dict_set_documents (m, d_docs);
- else
- {
- char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
- dict_set_documents (m, new_docs);
- free (new_docs);
- }
- }
-
- for (i = 0; i < dict_get_var_cnt (d); i++)
- {
- struct variable *dv = dict_get_var (d, i);
- struct variable *mv = dict_lookup_var (m, var_get_name (dv));
-
- if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
- continue;
-
- if (mv != NULL)
- {
- if (var_get_width (mv) != var_get_width (dv))
- {
- char *dv_description = var_type_description (dv);
- char *mv_description = var_type_description (mv);
- msg (SE, _("Variable %s in file %s (%s) has different "
- "type or width from the same variable in "
- "earlier file (%s)."),
- var_get_name (dv), fh_get_name (f->handle),
- dv_description, mv_description);
- free (dv_description);
- free (mv_description);
- return false;
- }
-
- if (var_get_width (dv) == var_get_width (mv))
- {
- if (var_has_value_labels (dv) && !var_has_value_labels (mv))
- var_set_value_labels (mv, var_get_value_labels (dv));
- if (var_has_missing_values (dv) && !var_has_missing_values (mv))
- var_set_missing_values (mv, var_get_missing_values (dv));
- }
-
- if (var_get_label (dv) && !var_get_label (mv))
- var_set_label (mv, var_get_label (dv));
- }
- else
- mv = dict_clone_var_assert (m, dv, var_get_name (dv));
- }
-
- return true;
-}
-\f
-/* Case map.
-
- A case map copies data from a case that corresponds for one
- dictionary to a case that corresponds to a second dictionary
- derived from the first by, optionally, deleting, reordering,
- or renaming variables. (No new variables may be created.)
- */
-
-/* A case map. */
-struct case_map
- {
- size_t value_cnt; /* Number of values in map. */
- int *map; /* For each destination index, the
- corresponding source index. */
- };
-
-/* Prepares dictionary D for producing a case map. Afterward,
- the caller may delete, reorder, or rename variables within D
- at will before using finish_case_map() to produce the case
- map.