-
-/* If VAR_NAME is a nonnull pointer to a non-empty string,
- attempts to create a variable named VAR_NAME, with format
- F1.0, in DICT, and stores a pointer to the variable in *VAR.
- Returns true if successful, false if the variable name is a
- duplicate (in which case a message saying that the variable
- specified on the given SUBCOMMAND is a duplicate is emitted).
- Also returns true, without doing anything, if VAR_NAME is null
- or empty. */
-static bool
-create_flag_var (const char *subcommand, const char *var_name,
- struct dictionary *dict, struct variable **var)
-{
- if (var_name != NULL && var_name[0] != '\0')
- {
- struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
- *var = dict_create_var (dict, var_name, 0);
- if (*var == NULL)
- {
- msg (SE, _("Variable name %s specified on %s subcommand "
- "duplicates an existing variable name."),
- subcommand, var_name);
- return false;
- }
- var_set_both_formats (*var, &format);
- }
- else
- *var = NULL;
- return true;
-}
-
-/* Return a string in an allocated buffer describing V's variable
- type and width. */
-static char *
-var_type_description (struct variable *v)
-{
- if (var_is_numeric (v))
- return xstrdup ("numeric");
- else
- return xasprintf ("string with width %d", var_get_width (v));
-}
-
-/* Closes all the files in MTF and frees their associated data.
- Returns true if successful, false if an I/O error occurred on
- any of the files. */
-static bool
-mtf_close_all_files (struct mtf_proc *mtf)
-{
- struct mtf_file *file;
- bool ok = true;
-
- ll_for_each_preremove (file, struct mtf_file, ll, &mtf->files)
- {
- casereader_destroy (file->reader);
- free (file->by);
- dict_destroy (file->dict);
- free (file->in_name);
- case_destroy (&file->input);
- free (file->vars);
- free (file);
- }
-
- return ok;
-}
-
-/* Frees all the data for the MATCH FILES procedure. */
-static void
-mtf_free (struct mtf_proc *mtf)
-{
- mtf_close_all_files (mtf);
- dict_destroy (mtf->dict);
- casewriter_destroy (mtf->output);
- case_destroy (&mtf->buffered_case);
- case_destroy (&mtf->prev_BY_case);
-}
-
-/* Reads the next record into FILE, if possible, and update MTF's
- nonempty_files count if not. */
-static bool
-mtf_read_record (struct mtf_proc *mtf, struct mtf_file *file)
-{
- case_destroy (&file->input);
- if (!casereader_read (file->reader, &file->input))
- {
- mtf->nonempty_files--;
- return false;
- }
- else
- return true;
-}
-
-/* Compare the BY variables for files A and B; return -1 if A <
- B, 0 if A == B, 1 if A > B. (If there are no BY variables,
- then all records are equal.) */
-static inline int
-mtf_compare_BY_values (struct mtf_proc *mtf,
- struct mtf_file *a, struct mtf_file *b)
-{
- return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
-}
-
-/* Processes input files and write one case to the output file. */
-static void
-mtf_process_case (struct mtf_proc *mtf)
-{
- struct ccase c;
- struct mtf_file *min;
- struct mtf_file *file;
- int min_sequence;
- size_t i;
-
- /* Find the set of one or more FILEs whose BY values are
- minimal, as well as the set of zero or more TABLEs whose BY
- values equal those of the minimum FILEs.
-
- After each iteration of the loop, this invariant holds: the
- FILEs with minimum BY values thus far have "sequence"
- members equal to min_sequence, and "min" points to one of
- the mtf_files whose case has those minimum BY values, and
- similarly for TABLEs. */
- min_sequence = 0;
- min = NULL;
- ll_for_each (file, struct mtf_file, ll, &mtf->files)
- if (case_is_null (&file->input))
- file->sequence = -1;
- else if (file->type == MTF_FILE)
- {
- int cmp = min != NULL ? mtf_compare_BY_values (mtf, min, file) : 1;
- if (cmp <= 0)
- file->sequence = cmp < 0 ? -1 : min_sequence;
- else
- {
- file->sequence = ++min_sequence;
- min = file;
- }
- }
- else
- {
- int cmp;
- assert (min != NULL);
- do
- {
- cmp = mtf_compare_BY_values (mtf, min, file);
- }
- while (cmp > 0 && mtf_read_record (mtf, file));
- file->sequence = cmp == 0 ? min_sequence : -1;
- }
-
- /* Form the output case from the input cases. */
- case_create (&c, dict_get_next_value_idx (mtf->dict));
- for (i = 0; i < dict_get_var_cnt (mtf->dict); i++)
- {
- struct variable *v = dict_get_var (mtf->dict, i);
- value_set_missing (case_data_rw (&c, v), var_get_width (v));
- }
- ll_for_each_reverse (file, struct mtf_file, ll, &mtf->files)
- {
- bool include_file = file->sequence == min_sequence;
- if (include_file)
- for (i = 0; i < file->var_cnt; i++)
- {
- const struct mtf_variable *mv = &file->vars[i];
- const union value *in = case_data (&file->input, mv->in_var);
- union value *out = case_data_rw (&c, mv->out_var);
- value_copy (out, in, var_get_width (mv->in_var));
- }
- if (file->in_var != NULL)
- case_data_rw (&c, file->in_var)->f = include_file;
- }
-
- /* Write the output case. */
- if (mtf->first == NULL && mtf->last == NULL)
- {
- /* With no FIRST or LAST variables, it's trivial. */
- casewriter_write (mtf->output, &c);
- }
- else
- {
- /* It's harder with LAST, because we can't know whether
- this case is the last in a group until we've prepared
- the *next* case also. Thus, we buffer the previous
- output case until the next one is ready.
-
- We also have to save a copy of one of the previous input
- cases, so that we can compare the BY variables. We
- can't compare the BY variables between the current
- output case and the saved one because the BY variables
- might not be in the output (the user is allowed to drop
- them). */
- bool new_BY;
- if (mtf->prev_BY != NULL)
- {
- new_BY = case_compare_2dict (&min->input, &mtf->prev_BY_case,
- min->by, mtf->prev_BY,
- mtf->by_cnt);
- if (mtf->last != NULL)
- case_data_rw (&mtf->buffered_case, mtf->last)->f = new_BY;
- casewriter_write (mtf->output, &mtf->buffered_case);
- }
- else
- new_BY = true;
-
- case_move (&mtf->buffered_case, &c);
- if (mtf->first != NULL)
- case_data_rw (&mtf->buffered_case, mtf->first)->f = new_BY;
-
- if (new_BY)
- {
- mtf->prev_BY = min->by;
- case_destroy (&mtf->prev_BY_case);
- case_clone (&mtf->prev_BY_case, &min->input);
- }
- }
-
- /* Read another record from each input file FILE with minimum
- values. */
- ll_for_each (file, struct mtf_file, ll, &mtf->files)
- if (file->type == MTF_FILE)
- {
- if (file->sequence == min_sequence)
- mtf_read_record (mtf, file);
- }
- else
- break;
-}
-
-/* Merge the dictionary for file F into master dictionary M. */
-static bool
-mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
-{
- struct dictionary *d = f->dict;
- const char *d_docs, *m_docs;
- int i;
-
- if (dict_get_label (m) == NULL)
- dict_set_label (m, dict_get_label (d));
-
- d_docs = dict_get_documents (d);
- m_docs = dict_get_documents (m);
- if (d_docs != NULL)
- {
- if (m_docs == NULL)
- dict_set_documents (m, d_docs);
- else
- {
- char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
- dict_set_documents (m, new_docs);
- free (new_docs);
- }
- }
-
- for (i = 0; i < dict_get_var_cnt (d); i++)
- {
- struct variable *dv = dict_get_var (d, i);
- struct variable *mv = dict_lookup_var (m, var_get_name (dv));
-
- if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
- continue;
-
- if (mv != NULL)
- {
- if (var_get_width (mv) != var_get_width (dv))
- {
- char *dv_description = var_type_description (dv);
- char *mv_description = var_type_description (mv);
- msg (SE, _("Variable %s in file %s (%s) has different "
- "type or width from the same variable in "
- "earlier file (%s)."),
- var_get_name (dv), fh_get_name (f->handle),
- dv_description, mv_description);
- free (dv_description);
- free (mv_description);
- return false;
- }
-
- if (var_get_width (dv) == var_get_width (mv))
- {
- if (var_has_value_labels (dv) && !var_has_value_labels (mv))
- var_set_value_labels (mv, var_get_value_labels (dv));
- if (var_has_missing_values (dv) && !var_has_missing_values (mv))
- var_set_missing_values (mv, var_get_missing_values (dv));
- }
-
- if (var_get_label (dv) && !var_get_label (mv))
- var_set_label (mv, var_get_label (dv));
- }
- else
- mv = dict_clone_var_assert (m, dv, var_get_name (dv));
- }
-
- return true;
-}
-\f
-/* Case map.
-
- A case map copies data from a case that corresponds for one
- dictionary to a case that corresponds to a second dictionary
- derived from the first by, optionally, deleting, reordering,
- or renaming variables. (No new variables may be created.)
- */
-
-/* A case map. */
-struct case_map
- {
- size_t value_cnt; /* Number of values in map. */
- int *map; /* For each destination index, the
- corresponding source index. */
- };
-
-/* Prepares dictionary D for producing a case map. Afterward,
- the caller may delete, reorder, or rename variables within D
- at will before using finish_case_map() to produce the case
- map.
-
- Uses D's aux members, which must otherwise not be in use. */
-static void
-start_case_map (struct dictionary *d)
-{
- size_t var_cnt = dict_get_var_cnt (d);
- size_t i;
-
- for (i = 0; i < var_cnt; i++)
- {
- struct variable *v = dict_get_var (d, i);
- int *src_fv = xmalloc (sizeof *src_fv);
- *src_fv = var_get_case_index (v);
- var_attach_aux (v, src_fv, var_dtor_free);
- }
-}
-
-/* Produces a case map from dictionary D, which must have been
- previously prepared with start_case_map().
-
- Does not retain any reference to D, and clears the aux members
- set up by start_case_map().
-
- Returns the new case map, or a null pointer if no mapping is
- required (that is, no data has changed position). */
-static struct case_map *
-finish_case_map (struct dictionary *d)
-{
- struct case_map *map;
- size_t var_cnt = dict_get_var_cnt (d);
- size_t i;
- int identity_map;
-
- map = xmalloc (sizeof *map);
- map->value_cnt = dict_get_next_value_idx (d);
- map->map = xnmalloc (map->value_cnt, sizeof *map->map);
- for (i = 0; i < map->value_cnt; i++)
- map->map[i] = -1;
-
- identity_map = 1;
- for (i = 0; i < var_cnt; i++)
- {
- struct variable *v = dict_get_var (d, i);
- size_t value_cnt = var_get_value_cnt (v);
- int *src_fv = (int *) var_detach_aux (v);
- size_t idx;
-
- if (var_get_case_index (v) != *src_fv)
- identity_map = 0;
-
- for (idx = 0; idx < value_cnt; idx++)
- {
- int src_idx = *src_fv + idx;
- int dst_idx = var_get_case_index (v) + idx;
-
- assert (map->map[dst_idx] == -1);
- map->map[dst_idx] = src_idx;
- }
- free (src_fv);
- }
-
- if (identity_map)
- {
- destroy_case_map (map);
- return NULL;
- }
-
- while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
- map->value_cnt--;
-
- return map;
-}
-
-/* Maps from SRC to DST, applying case map MAP. */
-static void
-map_case (const struct case_map *map,
- const struct ccase *src, struct ccase *dst)
-{
- size_t dst_idx;
-
- for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
- {
- int src_idx = map->map[dst_idx];
- if (src_idx != -1)
- *case_data_rw_idx (dst, dst_idx) = *case_data_idx (src, src_idx);
- }
-}
-
-/* Destroys case map MAP. */
-static void
-destroy_case_map (struct case_map *map)
-{
- if (map != NULL)
- {
- free (map->map);
- free (map);
- }
-}