- if (open_active_file)
- proc_commit (ds);
-
- proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
- mtf.dict = NULL;
- mtf.output = NULL;
-
- return mtf_free (&mtf) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
-
- error:
- if (open_active_file)
- proc_commit (ds);
- mtf_free (&mtf);
- return CMD_CASCADING_FAILURE;
-}
-
-/* Return a string in an allocated buffer describing V's variable
- type and width. */
-static char *
-var_type_description (struct variable *v)
-{
- if (var_is_numeric (v))
- return xstrdup ("numeric");
- else
- return xasprintf ("string with width %d", var_get_width (v));
-}
-
-/* Closes FILE and frees its associated data.
- Returns true if successful, false if an I/O error
- occurred on FILE. */
-static bool
-mtf_close_file (struct mtf_file *file)
-{
- bool ok = casereader_destroy (file->reader);
- free (file->by);
- if (!file->active_file)
- dict_destroy (file->dict);
- free (file->in_name);
- case_destroy (&file->input);
- free (file);
- return ok;
-}
-
-static bool
-mtf_close_all_files (struct mtf_proc *mtf)
-{
- struct mtf_file *iter, *next;
- bool ok = true;
-
- for (iter = mtf->head; iter; iter = next)
- {
- next = iter->next;
- assert (iter->dict != mtf->dict);
- if (!mtf_close_file (iter))
- ok = false;
- }
- mtf->head = NULL;
- return ok;
-}
-
-/* Free all the data for the MATCH FILES procedure.
- Returns true if successful, false if an I/O error
- occurred. */
-static bool
-mtf_free (struct mtf_proc *mtf)
-{
- bool ok;
-
- ok = mtf_close_all_files (mtf);
-
- casewriter_destroy (mtf->output);
- dict_destroy (mtf->dict);
- case_destroy (&mtf->mtf_case);
- free (mtf->seq_nums);
-
- return ok;
-}
-
-/* Remove *FILE from the mtf_file chain. Make *FILE point to the next
- file in the chain, or to NULL if was the last in the chain.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
-{
- struct mtf_file *f = *file;
- int i;
-
- if (f->prev)
- f->prev->next = f->next;
- if (f->next)
- f->next->prev = f->prev;
- if (f == mtf->head)
- mtf->head = f->next;
- if (f == mtf->tail)
- mtf->tail = f->prev;
- *file = f->next;
-
- if (f->in_var != NULL)
- case_data_rw (&mtf->mtf_case, f->in_var)->f = 0.;
- for (i = 0; i < dict_get_var_cnt (f->dict); i++)
- {
- struct variable *v = dict_get_var (f->dict, i);
- struct variable *mv = get_master (v);
- if (mv != NULL)
- {
- union value *out = case_data_rw (&mtf->mtf_case, mv);
-
- if (var_is_numeric (v))
- out->f = SYSMIS;
- else
- memset (out->s, ' ', var_get_width (v));
- }
- }
-
- return mtf_close_file (f);
-}
-
-/* Read a record from every input file.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-mtf_read_records (struct mtf_proc *mtf)
-{
- struct mtf_file *iter, *next;
- bool ok = true;
-
- for (iter = mtf->head; ok && iter != NULL; iter = next)
- {
- next = iter->next;
- if (!casereader_read (iter->reader, &iter->input))
- {
- if (!mtf_delete_file_in_place (mtf, &iter))
- ok = false;
- }
- }
- return ok;
-}
-
-/* Compare the BY variables for files A and B; return -1 if A < B, 0
- if A == B, 1 if A > B. */
-static inline int
-mtf_compare_BY_values (struct mtf_proc *mtf,
- struct mtf_file *a, struct mtf_file *b)
-{
- return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
-}
-
-/* Perform one iteration of steps 3...7 above.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-mtf_processing (struct mtf_proc *mtf)
-{
- struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
- struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
- struct mtf_file *iter, *next;
- struct ccase out_case;
-
- /* 3. Find the FILE input record(s) that have minimum BY
- values. Store all the values from these input records into
- the output record. */
- min_head = min_tail = mtf->head;
- max_head = max_tail = NULL;
- for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
- iter = iter->next)
- {
- int cmp = mtf_compare_BY_values (mtf, min_head, iter);
- if (cmp < 0)
- {
- if (max_head)
- max_tail = max_tail->next_min = iter;
- else
- max_head = max_tail = iter;
- }
- else if (cmp == 0)
- min_tail = min_tail->next_min = iter;
- else /* cmp > 0 */
- {
- if (max_head)
- {
- max_tail->next_min = min_head;
- max_tail = min_tail;
- }
- else
- {
- max_head = min_head;
- max_tail = min_tail;
- }
- min_head = min_tail = iter;
- }
- }
-
- /* 4. For every TABLE, read another record as long as the BY
- values on the TABLE's input record are less than the FILEs'
- BY values. If an exact match is found, store all the values
- from the TABLE input record into the output record. */
- for (; iter != NULL; iter = next)
- {
- assert (iter->type == MTF_TABLE);
-
- next = iter->next;
- for (;;)
- {
- int cmp = mtf_compare_BY_values (mtf, min_head, iter);
- if (cmp < 0)
- {
- if (max_head)
- max_tail = max_tail->next_min = iter;
- else
- max_head = max_tail = iter;
- }
- else if (cmp == 0)
- min_tail = min_tail->next_min = iter;
- else /* cmp > 0 */
- {
- case_destroy (&iter->input);
- if (casereader_read (iter->reader, &iter->input))
- continue;
- if (!mtf_delete_file_in_place (mtf, &iter))
- return false;
- }
- break;
- }
- }
-
- /* Next sequence number. */
- mtf->seq_num++;
-
- /* Store data to all the records we are using. */
- if (min_tail)
- min_tail->next_min = NULL;
- for (iter = min_head; iter; iter = iter->next_min)
- {
- int i;
-
- for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
- {
- struct variable *v = dict_get_var (iter->dict, i);
- struct variable *mv = get_master (v);
- size_t mv_index = mv ? var_get_dict_index (mv) : 0;
-
- if (mv != NULL && mtf->seq_nums[mv_index] != mtf->seq_num)
- {
- union value *out = case_data_rw (&mtf->mtf_case, mv);
-
- mtf->seq_nums[mv_index] = mtf->seq_num;
- if (var_is_numeric (v))
- out->f = case_num (&iter->input, v);
- else
- memcpy (out->s, case_str (&iter->input, v), var_get_width (v));
- }
- }
- if (iter->in_var != NULL)
- case_data_rw (&mtf->mtf_case, iter->in_var)->f = 1.;
- }
-
- /* Store missing values to all the records we're not using. */
- if (max_tail)
- max_tail->next_min = NULL;
- for (iter = max_head; iter; iter = iter->next_min)
- {
- int i;
-
- for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
- {
- struct variable *v = dict_get_var (iter->dict, i);
- struct variable *mv = get_master (v);
- size_t mv_index = mv ? var_get_dict_index (mv) : 0;
-
- if (mv != NULL && mtf->seq_nums[mv_index] != mtf->seq_num)
- {
- union value *out = case_data_rw (&mtf->mtf_case, mv);
- mtf->seq_nums[mv_index] = mtf->seq_num;
-
- if (var_is_numeric (v))
- out->f = SYSMIS;
- else
- memset (out->s, ' ', var_get_width (v));
- }
- }
- if (iter->in_var != NULL)
- case_data_rw (&mtf->mtf_case, iter->in_var)->f = 0.;
- }
-
- /* 5. Write the output record. */
- case_clone (&out_case, &mtf->mtf_case);
- casewriter_write (mtf->output, &out_case);
-
- /* 6. Read another record from each input file FILE and TABLE
- that we stored values from above. If we come to the end of
- one of the input files, remove it from the list of input
- files. */
- for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
- {
- next = iter->next_min;
- case_destroy (&iter->input);
- if (!casereader_read (iter->reader, &iter->input))
- if (!mtf_delete_file_in_place (mtf, &iter))
- return false;
- }
- return true;
-}
-
-/* Merge the dictionary for file F into master dictionary M. */
-static int
-mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
-{
- struct dictionary *d = f->dict;
- const char *d_docs, *m_docs;
- int i;
-
- if (dict_get_label (m) == NULL)
- dict_set_label (m, dict_get_label (d));
-
- d_docs = dict_get_documents (d);
- m_docs = dict_get_documents (m);
- if (d_docs != NULL)
- {
- if (m_docs == NULL)
- dict_set_documents (m, d_docs);
- else
- {
- char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
- dict_set_documents (m, new_docs);
- free (new_docs);
- }
- }
-
- for (i = 0; i < dict_get_var_cnt (d); i++)
- {
- struct variable *dv = dict_get_var (d, i);
- struct variable *mv = dict_lookup_var (m, var_get_name (dv));
-
- if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
- continue;
-
- if (mv != NULL)
- {
- if (var_get_width (mv) != var_get_width (dv))
- {
- msg (SE, _("Variable %s in file %s (%s) has different "
- "type or width from the same variable in "
- "earlier file (%s)."),
- var_get_name (dv), fh_get_name (f->handle),
- var_type_description (dv), var_type_description (mv));
- return 0;
- }
-
- if (var_get_width (dv) == var_get_width (mv))
- {
- if (var_has_value_labels (dv) && !var_has_value_labels (mv))
- var_set_value_labels (mv, var_get_value_labels (dv));
- if (var_has_missing_values (dv) && !var_has_missing_values (mv))
- var_set_missing_values (mv, var_get_missing_values (dv));
- }
-
- if (var_get_label (dv) && !var_get_label (mv))
- var_set_label (mv, var_get_label (dv));
- }
- else
- mv = dict_clone_var_assert (m, dv, var_get_name (dv));
- }
-
- return 1;
-}
-
-/* Marks V's master variable as MASTER. */
-static void
-set_master (struct variable *v, struct variable *master)
-{
- var_attach_aux (v, master, NULL);
-}
-
-/* Returns the master variable corresponding to V,
- as set with set_master(). */
-static struct variable *
-get_master (struct variable *v)
-{
- return var_get_aux (v);
-}
-\f
-/* Case map.
-
- A case map copies data from a case that corresponds for one
- dictionary to a case that corresponds to a second dictionary
- derived from the first by, optionally, deleting, reordering,
- or renaming variables. (No new variables may be created.)
- */