- if (!proc_close (ds))
- goto error;
-
- discard_variables (ds);
-
- dataset_set_dict (ds, mtf.dict);
- mtf.dict = NULL;
- proc_set_source (ds, storage_source_create (mtf.output));
- mtf.output = NULL;
-
- return mtf_free (&mtf) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
-
- error:
- proc_close (ds);
- mtf_free (&mtf);
- return CMD_CASCADING_FAILURE;
-}
-
-/* Return a string in a static buffer describing V's variable type and
- width. */
-static char *
-var_type_description (struct variable *v)
-{
- static char buf[2][32];
- static int x = 0;
- char *s;
-
- x ^= 1;
- s = buf[x];
-
- if (var_is_numeric (v))
- strcpy (s, "numeric");
- else
- sprintf (s, "string with width %d", var_get_width (v));
- return s;
-}
-
-/* Closes FILE and frees its associated data.
- Returns true if successful, false if an I/O error
- occurred on FILE. */
-static bool
-mtf_close_file (struct mtf_file *file)
-{
- bool ok = file->reader == NULL || !any_reader_error (file->reader);
- free (file->by);
- any_reader_close (file->reader);
- if (file->handle != NULL)
- dict_destroy (file->dict);
- case_destroy (&file->input_storage);
- free (file->in_name);
- free (file);
- return ok;
-}
-
-/* Free all the data for the MATCH FILES procedure.
- Returns true if successful, false if an I/O error
- occurred. */
-static bool
-mtf_free (struct mtf_proc *mtf)
-{
- struct mtf_file *iter, *next;
- bool ok = true;
-
- for (iter = mtf->head; iter; iter = next)
- {
- next = iter->next;
- assert (iter->dict != mtf->dict);
- if (!mtf_close_file (iter))
- ok = false;
- }
-
- if (mtf->dict)
- dict_destroy (mtf->dict);
- case_destroy (&mtf->mtf_case);
- free (mtf->seq_nums);
-
- return ok;
-}
-
-/* Remove *FILE from the mtf_file chain. Make *FILE point to the next
- file in the chain, or to NULL if was the last in the chain.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
-{
- struct mtf_file *f = *file;
- int i;
-
- if (f->prev)
- f->prev->next = f->next;
- if (f->next)
- f->next->prev = f->prev;
- if (f == mtf->head)
- mtf->head = f->next;
- if (f == mtf->tail)
- mtf->tail = f->prev;
- *file = f->next;
-
- if (f->in_var != NULL)
- case_data_rw (&mtf->mtf_case, f->in_var)->f = 0.;
- for (i = 0; i < dict_get_var_cnt (f->dict); i++)
- {
- struct variable *v = dict_get_var (f->dict, i);
- struct variable *mv = get_master (v);
- if (mv != NULL)
- {
- union value *out = case_data_rw (&mtf->mtf_case, mv);
-
- if (var_is_numeric (v))
- out->f = SYSMIS;
- else
- memset (out->s, ' ', var_get_width (v));
- }
- }
-
- return mtf_close_file (f);
-}
-
-/* Read a record from every input file.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-mtf_read_records (struct mtf_proc *mtf, struct dataset *ds)
-{
- struct mtf_file *iter, *next;
- bool ok = true;
-
- for (iter = mtf->head; ok && iter != NULL; iter = next)
- {
- next = iter->next;
- if (iter->handle
- ? !any_reader_read (iter->reader, iter->input)
- : !proc_read (ds, &iter->input))
- {
- if (!mtf_delete_file_in_place (mtf, &iter))
- ok = false;
- }
- }
- return ok;
-}
-
-/* Compare the BY variables for files A and B; return -1 if A < B, 0
- if A == B, 1 if A > B. */
-static inline int
-mtf_compare_BY_values (struct mtf_proc *mtf,
- struct mtf_file *a, struct mtf_file *b)
-{
- return case_compare_2dict (a->input, b->input, a->by, b->by, mtf->by_cnt);
-}
-
-/* Perform one iteration of steps 3...7 above.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-mtf_processing (struct mtf_proc *mtf, struct dataset *ds)
-{
- struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
- struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
- struct mtf_file *iter, *next;
-
- /* 3. Find the FILE input record(s) that have minimum BY
- values. Store all the values from these input records into
- the output record. */
- min_head = min_tail = mtf->head;
- max_head = max_tail = NULL;
- for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
- iter = iter->next)
- {
- int cmp = mtf_compare_BY_values (mtf, min_head, iter);
- if (cmp < 0)
- {
- if (max_head)
- max_tail = max_tail->next_min = iter;
- else
- max_head = max_tail = iter;
- }
- else if (cmp == 0)
- min_tail = min_tail->next_min = iter;
- else /* cmp > 0 */
- {
- if (max_head)
- {
- max_tail->next_min = min_head;
- max_tail = min_tail;
- }
- else
- {
- max_head = min_head;
- max_tail = min_tail;
- }
- min_head = min_tail = iter;
- }
- }
-
- /* 4. For every TABLE, read another record as long as the BY
- values on the TABLE's input record are less than the FILEs'
- BY values. If an exact match is found, store all the values
- from the TABLE input record into the output record. */
- for (; iter != NULL; iter = next)
- {
- assert (iter->type == MTF_TABLE);
-
- next = iter->next;
- for (;;)
- {
- int cmp = mtf_compare_BY_values (mtf, min_head, iter);
- if (cmp < 0)
- {
- if (max_head)
- max_tail = max_tail->next_min = iter;
- else
- max_head = max_tail = iter;
- }
- else if (cmp == 0)
- min_tail = min_tail->next_min = iter;
- else /* cmp > 0 */
- {
- if (iter->handle
- ? any_reader_read (iter->reader, iter->input)
- : proc_read (ds, &iter->input))
- continue;
- if (!mtf_delete_file_in_place (mtf, &iter))
- return false;
- }
- break;
- }
- }
-
- /* Next sequence number. */
- mtf->seq_num++;
-
- /* Store data to all the records we are using. */
- if (min_tail)
- min_tail->next_min = NULL;
- for (iter = min_head; iter; iter = iter->next_min)
- {
- int i;
-
- for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
- {
- struct variable *v = dict_get_var (iter->dict, i);
- struct variable *mv = get_master (v);
- size_t mv_index = mv ? var_get_dict_index (mv) : 0;
-
- if (mv != NULL && mtf->seq_nums[mv_index] != mtf->seq_num)
- {
- const struct ccase *record = iter->input;
- union value *out = case_data_rw (&mtf->mtf_case, mv);
-
- mtf->seq_nums[mv_index] = mtf->seq_num;
- if (var_is_numeric (v))
- out->f = case_num (record, v);
- else
- memcpy (out->s, case_str (record, v), var_get_width (v));
- }
- }
- if (iter->in_var != NULL)
- case_data_rw (&mtf->mtf_case, iter->in_var)->f = 1.;
- }
-
- /* Store missing values to all the records we're not using. */
- if (max_tail)
- max_tail->next_min = NULL;
- for (iter = max_head; iter; iter = iter->next_min)
- {
- int i;
-
- for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
- {
- struct variable *v = dict_get_var (iter->dict, i);
- struct variable *mv = get_master (v);
- size_t mv_index = mv ? var_get_dict_index (mv) : 0;
-
- if (mv != NULL && mtf->seq_nums[mv_index] != mtf->seq_num)
- {
- union value *out = case_data_rw (&mtf->mtf_case, mv);
- mtf->seq_nums[mv_index] = mtf->seq_num;
-
- if (var_is_numeric (v))
- out->f = SYSMIS;
- else
- memset (out->s, ' ', var_get_width (v));
- }
- }
- if (iter->in_var != NULL)
- case_data_rw (&mtf->mtf_case, iter->in_var)->f = 0.;
- }
-
- /* 5. Write the output record. */
- casefile_append (mtf->output, &mtf->mtf_case);
-
- /* 6. Read another record from each input file FILE and TABLE
- that we stored values from above. If we come to the end of
- one of the input files, remove it from the list of input
- files. */
- for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
- {
- next = iter->next_min;
- if (iter->reader != NULL
- ? !any_reader_read (iter->reader, iter->input)
- : !proc_read (ds, &iter->input))
- if (!mtf_delete_file_in_place (mtf, &iter))
- return false;
- }
- return true;
-}
-
-/* Merge the dictionary for file F into master dictionary M. */
-static int
-mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
-{
- struct dictionary *d = f->dict;
- const char *d_docs, *m_docs;
- int i;
-
- if (dict_get_label (m) == NULL)
- dict_set_label (m, dict_get_label (d));
-
- d_docs = dict_get_documents (d);
- m_docs = dict_get_documents (m);
- if (d_docs != NULL)
- {
- if (m_docs == NULL)
- dict_set_documents (m, d_docs);
- else
- {
- char *new_docs;
- size_t new_len;
-
- new_len = strlen (m_docs) + strlen (d_docs);
- new_docs = xmalloc (new_len + 1);
- strcpy (new_docs, m_docs);
- strcat (new_docs, d_docs);
- dict_set_documents (m, new_docs);
- free (new_docs);
- }
- }
-
- for (i = 0; i < dict_get_var_cnt (d); i++)
- {
- struct variable *dv = dict_get_var (d, i);
- struct variable *mv = dict_lookup_var (m, var_get_name (dv));
-
- if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
- continue;
-
- if (mv != NULL)
- {
- if (var_get_width (mv) != var_get_width (dv))
- {
- msg (SE, _("Variable %s in file %s (%s) has different "
- "type or width from the same variable in "
- "earlier file (%s)."),
- var_get_name (dv), fh_get_name (f->handle),
- var_type_description (dv), var_type_description (mv));
- return 0;
- }
-
- if (var_get_width (dv) == var_get_width (mv))
- {
- if (var_has_value_labels (dv) && !var_has_value_labels (mv))
- var_set_value_labels (mv, var_get_value_labels (dv));
- if (var_has_missing_values (dv) && !var_has_missing_values (mv))
- var_set_missing_values (mv, var_get_missing_values (dv));
- }
-
- if (var_get_label (dv) && !var_get_label (mv))
- var_set_label (mv, var_get_label (dv));
- }
- else
- mv = dict_clone_var_assert (m, dv, var_get_name (dv));