- next = iter->next;
- assert (iter->dict != mtf->dict);
- if (!mtf_close_file (iter))
- ok = false;
- }
-
- if (mtf->dict)
- dict_destroy (mtf->dict);
- case_destroy (&mtf->mtf_case);
- free (mtf->seq_nums);
-
- return ok;
-}
-
-/* Remove *FILE from the mtf_file chain. Make *FILE point to the next
- file in the chain, or to NULL if was the last in the chain.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
-{
- struct mtf_file *f = *file;
- int i;
-
- if (f->prev)
- f->prev->next = f->next;
- if (f->next)
- f->next->prev = f->prev;
- if (f == mtf->head)
- mtf->head = f->next;
- if (f == mtf->tail)
- mtf->tail = f->prev;
- *file = f->next;
-
- if (f->in_var != NULL)
- case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
- for (i = 0; i < dict_get_var_cnt (f->dict); i++)
- {
- struct variable *v = dict_get_var (f->dict, i);
- struct variable *mv = get_master (v);
- if (mv != NULL)
- {
- union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
-
- if (v->type == NUMERIC)
- out->f = SYSMIS;
- else
- memset (out->s, ' ', v->width);
- }
- }
-
- return mtf_close_file (f);
-}
-
-/* Read a record from every input file except the active file.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-mtf_read_nonactive_records (void *mtf_)
-{
- struct mtf_proc *mtf = mtf_;
- struct mtf_file *iter, *next;
- bool ok = true;
-
- for (iter = mtf->head; ok && iter != NULL; iter = next)
- {
- next = iter->next;
- if (iter->handle && !any_reader_read (iter->reader, &iter->input))
- if (!mtf_delete_file_in_place (mtf, &iter))
- ok = false;
- }
- return ok;
-}
-
-/* Compare the BY variables for files A and B; return -1 if A < B, 0
- if A == B, 1 if A > B. */
-static inline int
-mtf_compare_BY_values (struct mtf_proc *mtf,
- struct mtf_file *a, struct mtf_file *b,
- struct ccase *c)
-{
- struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
- struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
- assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
- return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
-}
-
-/* Perform one iteration of steps 3...7 above.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-mtf_processing (struct ccase *c, void *mtf_)
-{
- struct mtf_proc *mtf = mtf_;
-
- /* Do we need another record from the active file? */
- bool read_active_file;
-
- assert (mtf->head != NULL);
- if (mtf->head->type == MTF_TABLE)
- return true;
-
- do
- {
- struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
- struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
- struct mtf_file *iter, *next;
-
- read_active_file = false;
-
- /* 3. Find the FILE input record(s) that have minimum BY
- values. Store all the values from these input records into
- the output record. */
- min_head = min_tail = mtf->head;
- max_head = max_tail = NULL;
- for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
- iter = iter->next)
- {
- int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
- if (cmp < 0)
- {
- if (max_head)
- max_tail = max_tail->next_min = iter;
- else
- max_head = max_tail = iter;
- }
- else if (cmp == 0)
- min_tail = min_tail->next_min = iter;
- else /* cmp > 0 */
- {
- if (max_head)
- {
- max_tail->next_min = min_head;
- max_tail = min_tail;
- }
- else
- {
- max_head = min_head;
- max_tail = min_tail;
- }
- min_head = min_tail = iter;
- }
- }
-
- /* 4. For every TABLE, read another record as long as the BY
- values on the TABLE's input record are less than the FILEs'
- BY values. If an exact match is found, store all the values
- from the TABLE input record into the output record. */
- for (; iter != NULL; iter = next)
- {
- assert (iter->type == MTF_TABLE);
-
- next = iter->next;
- for (;;)
- {
- int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
- if (cmp < 0)
- {
- if (max_head)
- max_tail = max_tail->next_min = iter;
- else
- max_head = max_tail = iter;
- }
- else if (cmp == 0)
- min_tail = min_tail->next_min = iter;
- else /* cmp > 0 */
- {
- if (iter->handle == NULL)
- return true;
- if (any_reader_read (iter->reader, &iter->input))
- continue;
- if (!mtf_delete_file_in_place (mtf, &iter))
- return false;
- }
- break;
- }
- }
-
- /* Next sequence number. */
- mtf->seq_num++;
-
- /* Store data to all the records we are using. */
- if (min_tail)
- min_tail->next_min = NULL;
- for (iter = min_head; iter; iter = iter->next_min)
- {
- int i;
-
- for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
- {
- struct variable *v = dict_get_var (iter->dict, i);
- struct variable *mv = get_master (v);
-
- if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
- {
- struct ccase *record
- = case_is_null (&iter->input) ? c : &iter->input;
- union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
-
- mtf->seq_nums[mv->index] = mtf->seq_num;
- if (v->type == NUMERIC)
- out->f = case_num (record, v->fv);
- else
- memcpy (out->s, case_str (record, v->fv), v->width);
- }
- }
- if (iter->in_var != NULL)
- case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
-
- if (iter->type == MTF_FILE && iter->handle == NULL)
- read_active_file = true;
- }
-
- /* Store missing values to all the records we're not
- using. */
- if (max_tail)
- max_tail->next_min = NULL;
- for (iter = max_head; iter; iter = iter->next_min)
- {
- int i;
-
- for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
- {
- struct variable *v = dict_get_var (iter->dict, i);
- struct variable *mv = get_master (v);
-
- if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
- {
- union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
- mtf->seq_nums[mv->index] = mtf->seq_num;
-
- if (v->type == NUMERIC)
- out->f = SYSMIS;
- else
- memset (out->s, ' ', v->width);
- }
- }
- if (iter->in_var != NULL)
- case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
- }
-
- /* 5. Write the output record. */
- mtf->sink->class->write (mtf->sink, &mtf->mtf_case);
-
- /* 6. Read another record from each input file FILE and TABLE
- that we stored values from above. If we come to the end of
- one of the input files, remove it from the list of input
- files. */
- for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
- {
- next = iter->next_min;
- if (iter->reader != NULL
- && !any_reader_read (iter->reader, &iter->input))
- if (!mtf_delete_file_in_place (mtf, &iter))
- return false;
- }
- }
- while (!read_active_file
- && mtf->head != NULL && mtf->head->type == MTF_FILE);
-
- return true;
-}
-
-/* Merge the dictionary for file F into master dictionary M. */
-static int
-mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
-{
- struct dictionary *d = f->dict;
- const char *d_docs, *m_docs;
- int i;
-
- if (dict_get_label (m) == NULL)
- dict_set_label (m, dict_get_label (d));
-
- d_docs = dict_get_documents (d);
- m_docs = dict_get_documents (m);
- if (d_docs != NULL)
- {
- if (m_docs == NULL)
- dict_set_documents (m, d_docs);
- else
- {
- char *new_docs;
- size_t new_len;
-
- new_len = strlen (m_docs) + strlen (d_docs);
- new_docs = xmalloc (new_len + 1);
- strcpy (new_docs, m_docs);
- strcat (new_docs, d_docs);
- dict_set_documents (m, new_docs);
- free (new_docs);
- }
- }
-
- for (i = 0; i < dict_get_var_cnt (d); i++)
- {
- struct variable *dv = dict_get_var (d, i);
- struct variable *mv = dict_lookup_var (m, dv->name);
-
- if (dict_class_from_id (dv->name) == DC_SCRATCH)
- continue;
-
- if (mv != NULL)
- {
- if (mv->width != dv->width)
- {
- msg (SE, _("Variable %s in file %s (%s) has different "
- "type or width from the same variable in "
- "earlier file (%s)."),
- dv->name, fh_get_name (f->handle),
- var_type_description (dv), var_type_description (mv));
- return 0;
- }
-
- if (dv->width == mv->width)
- {
- if (val_labs_count (dv->val_labs)
- && !val_labs_count (mv->val_labs))
- mv->val_labs = val_labs_copy (dv->val_labs);
- if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
- mv_copy (&mv->miss, &dv->miss);
- }
-
- if (dv->label && !mv->label)
- mv->label = xstrdup (dv->label);
- }
- else
- mv = dict_clone_var_assert (m, dv, dv->name);