/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <stdlib.h>
-#include <data/any-reader.h>
-#include <data/case-matcher.h>
-#include <data/case.h>
-#include <data/casereader.h>
-#include <data/casewriter.h>
-#include <data/dictionary.h>
-#include <data/format.h>
-#include <data/procedure.h>
-#include <data/subcase.h>
-#include <data/variable.h>
-#include <language/command.h>
-#include <language/data-io/file-handle.h>
-#include <language/data-io/trim.h>
-#include <language/lexer/lexer.h>
-#include <language/lexer/variable-parser.h>
-#include <language/stats/sort-criteria.h>
-#include <libpspp/assertion.h>
-#include <libpspp/message.h>
-#include <libpspp/taint.h>
-#include <math/sort.h>
-
-#include "xalloc.h"
+#include "data/any-reader.h"
+#include "data/case-matcher.h"
+#include "data/case.h"
+#include "data/casereader.h"
+#include "data/casewriter.h"
+#include "data/dataset.h"
+#include "data/dictionary.h"
+#include "data/format.h"
+#include "data/subcase.h"
+#include "data/variable.h"
+#include "language/command.h"
+#include "language/data-io/file-handle.h"
+#include "language/data-io/trim.h"
+#include "language/lexer/lexer.h"
+#include "language/lexer/variable-parser.h"
+#include "language/stats/sort-criteria.h"
+#include "libpspp/assertion.h"
+#include "libpspp/i18n.h"
+#include "libpspp/message.h"
+#include "libpspp/string-array.h"
+#include "libpspp/taint.h"
+#include "math/sort.h"
+
+#include "gl/minmax.h"
+#include "gl/xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
{
/* Basics. */
enum comb_file_type type; /* COMB_FILE or COMB_TABLE. */
+ int start_ofs, end_ofs; /* Lexer offsets. */
/* Variables. */
struct subcase by_vars; /* BY variables in this input file. */
struct subcase src, dst; /* Data to copy to output; where to put it. */
+ const struct missing_values **mv; /* Each variable's missing values. */
/* Input files. */
struct file_handle *handle; /* Input file handle. */
bool is_sorted; /* Is file presorted on the BY variables? */
/* IN subcommand. */
- char in_name[VAR_NAME_LEN + 1];
+ char *in_name;
+ int in_ofs;
struct variable *in_var;
};
struct subcase by_vars; /* BY variables in the output. */
struct casewriter *output; /* Destination for output. */
+ size_t *var_sources;
+ size_t n_var_sources, allocated_var_sources;
+
struct case_matcher *matcher;
/* FIRST, LAST.
static void free_comb_proc (struct comb_proc *);
static void close_all_comb_files (struct comb_proc *);
-static bool merge_dictionary (struct dictionary *const, struct comb_file *);
+static bool merge_dictionary (struct comb_proc *, struct lexer *,
+ struct comb_file *);
static void execute_update (struct comb_proc *);
static void execute_match_files (struct comb_proc *);
static void execute_add_files (struct comb_proc *);
-static bool create_flag_var (const char *subcommand_name, const char *var_name,
+static bool create_flag_var (struct lexer *lexer, const char *subcommand_name,
+ const char *var_name, int var_ofs,
struct dictionary *, struct variable **);
static void output_case (struct comb_proc *, struct ccase *, union value *by);
static void output_buffered_case (struct comb_proc *);
combine_files (enum comb_command_type command,
struct lexer *lexer, struct dataset *ds)
{
- struct comb_proc proc;
+ struct comb_proc proc = {
+ .dict = dict_create (get_default_encoding ()),
+ };
bool saw_by = false;
bool saw_sort = false;
struct casereader *active_file = NULL;
- char first_name[VAR_NAME_LEN + 1] = "";
- char last_name[VAR_NAME_LEN + 1] = "";
+ char *first_name = NULL;
+ int first_ofs = 0;
+ char *last_name = NULL;
+ int last_ofs = 0;
struct taint *taint = NULL;
- size_t n_tables = 0;
+ size_t table_idx = SIZE_MAX;
+ int sort_ofs = INT_MAX;
size_t allocated_files = 0;
- size_t i;
-
- proc.files = NULL;
- proc.n_files = 0;
- proc.dict = dict_create ();
- proc.output = NULL;
- proc.matcher = NULL;
- subcase_init_empty (&proc.by_vars);
- proc.first = NULL;
- proc.last = NULL;
- proc.buffered_case = NULL;
- proc.prev_BY = NULL;
-
dict_set_case_limit (proc.dict, dict_get_case_limit (dataset_dict (ds)));
- lex_match (lexer, '/');
+ lex_match (lexer, T_SLASH);
for (;;)
{
- struct comb_file *file;
+ int start_ofs = lex_ofs (lexer);
enum comb_file_type type;
-
if (lex_match_id (lexer, "FILE"))
type = COMB_FILE;
else if (command == COMB_MATCH && lex_match_id (lexer, "TABLE"))
{
type = COMB_TABLE;
- n_tables++;
+ table_idx = MIN (table_idx, proc.n_files);
}
else
break;
- lex_match (lexer, '=');
+ lex_match (lexer, T_EQUALS);
if (proc.n_files >= allocated_files)
proc.files = x2nrealloc (proc.files, &allocated_files,
sizeof *proc.files);
- file = &proc.files[proc.n_files++];
- file->type = type;
- subcase_init_empty (&file->by_vars);
- subcase_init_empty (&file->src);
- subcase_init_empty (&file->dst);
- file->handle = NULL;
- file->dict = NULL;
- file->reader = NULL;
- file->data = NULL;
- file->is_sorted = true;
- file->in_name[0] = '\0';
- file->in_var = NULL;
-
- if (lex_match (lexer, '*'))
+ struct comb_file *file = &proc.files[proc.n_files++];
+ *file = (struct comb_file) {
+ .type = type,
+ .start_ofs = start_ofs,
+ .is_sorted = true,
+ };
+
+ if (lex_match (lexer, T_ASTERISK))
{
- if (!proc_has_active_file (ds))
+ if (!dataset_has_source (ds))
{
- msg (SE, _("Cannot specify the active file since no active "
- "file has been defined."));
+ lex_next_error (lexer, -1, -1,
+ _("Cannot specify the active dataset since none "
+ "has been defined."));
goto error;
}
if (proc_make_temporary_transformations_permanent (ds))
- msg (SE, _("This command may not be used after TEMPORARY when "
- "the active file is an input source. "
- "Temporary transformations will be made permanent."));
+ lex_next_error (lexer, -1, -1,
+ _("This command may not be used after TEMPORARY "
+ "when the active dataset is an input source. "
+ "Temporary transformations will be made "
+ "permanent."));
file->dict = dict_clone (dataset_dict (ds));
}
else
{
- file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
+ file->handle = fh_parse (lexer, FH_REF_FILE, dataset_session (ds));
if (file->handle == NULL)
goto error;
- file->reader = any_reader_open (file->handle, &file->dict);
+ file->reader = any_reader_open_and_decode (file->handle, NULL,
+ &file->dict, NULL);
if (file->reader == NULL)
goto error;
}
+ file->end_ofs = lex_ofs (lexer) - 1;
- while (lex_match (lexer, '/'))
+ while (lex_match (lexer, T_SLASH))
if (lex_match_id (lexer, "RENAME"))
{
- if (!parse_dict_rename (lexer, file->dict))
+ if (!parse_dict_rename (lexer, file->dict, false))
goto error;
}
else if (lex_match_id (lexer, "IN"))
{
- lex_match (lexer, '=');
- if (lex_token (lexer) != T_ID)
- {
- lex_error (lexer, NULL);
- goto error;
- }
+ lex_match (lexer, T_EQUALS);
+ if (!lex_force_id (lexer))
+ goto error;
- if (file->in_name[0])
+ if (file->in_name)
{
- msg (SE, _("Multiple IN subcommands for a single FILE or "
- "TABLE."));
+ lex_error (lexer, _("Multiple IN subcommands for a single FILE "
+ "or TABLE."));
goto error;
}
- strcpy (file->in_name, lex_tokid (lexer));
+ file->in_name = xstrdup (lex_tokcstr (lexer));
+ file->in_ofs = lex_ofs (lexer);
lex_get (lexer);
}
else if (lex_match_id (lexer, "SORT"))
{
file->is_sorted = false;
saw_sort = true;
+ sort_ofs = MIN (sort_ofs, lex_ofs (lexer) - 1);
}
- merge_dictionary (proc.dict, file);
+ if (!merge_dictionary (&proc, lexer, file))
+ goto error;
}
- while (lex_token (lexer) != '.')
+ while (lex_token (lexer) != T_ENDCMD)
{
if (lex_match (lexer, T_BY))
{
- const struct variable **by_vars;
- size_t i;
- bool ok;
-
- if (saw_by)
+ if (saw_by)
{
- lex_sbc_only_once ("BY");
+ lex_sbc_only_once (lexer, "BY");
goto error;
}
saw_by = true;
- lex_match (lexer, '=');
+ lex_match (lexer, T_EQUALS);
+
+ const struct variable **by_vars;
if (!parse_sort_criteria (lexer, proc.dict, &proc.by_vars,
&by_vars, NULL))
goto error;
- ok = true;
- for (i = 0; i < proc.n_files; i++)
+ bool ok = true;
+ for (size_t i = 0; i < proc.n_files; i++)
{
struct comb_file *file = &proc.files[i];
- size_t j;
-
- for (j = 0; j < subcase_get_n_values (&proc.by_vars); j++)
+ for (size_t j = 0; j < subcase_get_n_fields (&proc.by_vars); j++)
{
const char *name = var_get_name (by_vars[j]);
struct variable *var = dict_lookup_var (file->dict, name);
subcase_get_direction (&proc.by_vars, j));
else
{
- if (file->handle != NULL)
- msg (SE, _("File %s lacks BY variable %s."),
- fh_get_name (file->handle), name);
- else
- msg (SE, _("Active file lacks BY variable %s."), name);
+ const char *fn
+ = file->handle ? fh_get_name (file->handle) : "*";
+ lex_ofs_error (lexer, file->start_ofs, file->end_ofs,
+ _("File %s lacks BY variable %s."),
+ fn, name);
ok = false;
}
}
}
else if (command != COMB_UPDATE && lex_match_id (lexer, "FIRST"))
{
- if (first_name[0] != '\0')
+ if (first_name != NULL)
{
- lex_sbc_only_once ("FIRST");
+ lex_sbc_only_once (lexer, "FIRST");
goto error;
}
- lex_match (lexer, '=');
+ lex_match (lexer, T_EQUALS);
if (!lex_force_id (lexer))
goto error;
- strcpy (first_name, lex_tokid (lexer));
+ first_name = xstrdup (lex_tokcstr (lexer));
+ first_ofs = lex_ofs (lexer);
lex_get (lexer);
}
else if (command != COMB_UPDATE && lex_match_id (lexer, "LAST"))
{
- if (last_name[0] != '\0')
+ if (last_name != NULL)
{
- lex_sbc_only_once ("LAST");
+ lex_sbc_only_once (lexer, "LAST");
goto error;
}
- lex_match (lexer, '=');
+ lex_match (lexer, T_EQUALS);
if (!lex_force_id (lexer))
goto error;
- strcpy (last_name, lex_tokid (lexer));
+ last_name = xstrdup (lex_tokcstr (lexer));
+ last_ofs = lex_ofs (lexer);
lex_get (lexer);
}
else if (lex_match_id (lexer, "MAP"))
goto error;
}
- if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
+ if (!lex_match (lexer, T_SLASH) && lex_token (lexer) != T_ENDCMD)
{
lex_end_of_command (lexer);
goto error;
{
if (command == COMB_UPDATE)
{
- msg (SE, _("The BY subcommand is required."));
+ lex_sbc_missing (lexer, "BY");
goto error;
}
- if (n_tables)
+ if (table_idx != SIZE_MAX)
{
- msg (SE, _("BY is required when TABLE is specified."));
+ const struct comb_file *table = &proc.files[table_idx];
+ lex_ofs_error (lexer, table->start_ofs, table->end_ofs,
+ _("BY is required when %s is specified."), "TABLE");
goto error;
}
if (saw_sort)
{
- msg (SE, _("BY is required when SORT is specified."));
+ lex_ofs_error (lexer, sort_ofs, sort_ofs,
+ _("BY is required when %s is specified."), "SORT");
goto error;
}
}
/* Add IN, FIRST, and LAST variables to master dictionary. */
- for (i = 0; i < proc.n_files; i++)
+ for (size_t i = 0; i < proc.n_files; i++)
{
struct comb_file *file = &proc.files[i];
- if (!create_flag_var ("IN", file->in_name, proc.dict, &file->in_var))
+ if (!create_flag_var (lexer, "IN", file->in_name, file->in_ofs,
+ proc.dict, &file->in_var))
goto error;
}
- if (!create_flag_var ("FIRST", first_name, proc.dict, &proc.first)
- || !create_flag_var ("LAST", last_name, proc.dict, &proc.last))
+ if (!create_flag_var (lexer, "FIRST", first_name, first_ofs, proc.dict, &proc.first)
+ || !create_flag_var (lexer, "LAST", last_name, last_ofs, proc.dict, &proc.last))
goto error;
dict_delete_scratch_vars (proc.dict);
/* Set up mapping from each file's variables to master
variables. */
- for (i = 0; i < proc.n_files; i++)
+ for (size_t i = 0; i < proc.n_files; i++)
{
struct comb_file *file = &proc.files[i];
- size_t src_var_cnt = dict_get_var_cnt (file->dict);
- size_t j;
+ size_t src_n_vars = dict_get_n_vars (file->dict);
- for (j = 0; j < src_var_cnt; j++)
+ file->mv = xnmalloc (src_n_vars, sizeof *file->mv);
+ for (size_t j = 0; j < src_n_vars; j++)
{
struct variable *src_var = dict_get_var (file->dict, j);
struct variable *dst_var = dict_lookup_var (proc.dict,
var_get_name (src_var));
if (dst_var != NULL)
{
+ size_t n = subcase_get_n_fields (&file->src);
+ file->mv[n] = var_get_missing_values (src_var);
subcase_add_var (&file->src, src_var, SC_ASCEND);
subcase_add_var (&file->dst, dst_var, SC_ASCEND);
}
}
}
- proc.output = autopaging_writer_create (dict_get_next_value_idx (proc.dict));
+ proc.output = autopaging_writer_create (dict_get_proto (proc.dict));
taint = taint_clone (casewriter_get_taint (proc.output));
/* Set up case matcher. */
proc.matcher = case_matcher_create ();
- for (i = 0; i < proc.n_files; i++)
+ for (size_t i = 0; i < proc.n_files; i++)
{
struct comb_file *file = &proc.files[i];
if (file->reader == NULL)
if (active_file == NULL)
{
proc_discard_output (ds);
- file->reader = active_file = proc_open (ds);
+ file->reader = active_file = proc_open_filtering (ds, false);
}
else
file->reader = casereader_clone (active_file);
if (active_file != NULL)
proc_commit (ds);
- proc_set_active_file (ds, casewriter_make_reader (proc.output), proc.dict);
+ dataset_set_dict (ds, proc.dict);
+ dataset_set_source (ds, casewriter_make_reader (proc.output));
proc.dict = NULL;
proc.output = NULL;
free_comb_proc (&proc);
+ free (first_name);
+ free (last_name);
+
return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
error:
proc_commit (ds);
free_comb_proc (&proc);
taint_destroy (taint);
+ free (first_name);
+ free (last_name);
return CMD_CASCADING_FAILURE;
}
-/* Merge the dictionary for file F into master dictionary M. */
+/* Merge the dictionary for file F into master dictionary for PROC. */
static bool
-merge_dictionary (struct dictionary *const m, struct comb_file *f)
+merge_dictionary (struct comb_proc *proc, struct lexer *lexer,
+ struct comb_file *f)
{
+ struct dictionary *m = proc->dict;
struct dictionary *d = f->dict;
- const char *d_docs, *m_docs;
- int i;
- const char *file_encoding;
if (dict_get_label (m) == NULL)
dict_set_label (m, dict_get_label (d));
- d_docs = dict_get_documents (d);
- m_docs = dict_get_documents (m);
-
-
- /* If the input files have different encodings, then
+ /* FIXME: If the input files have different encodings, then
+ the result is undefined.
+ The correct thing to do would be to convert to an encoding
+ which can cope with all the input files (eg UTF-8).
*/
- file_encoding = dict_get_encoding (f->dict);
- if ( file_encoding != NULL)
- {
- if ( dict_get_encoding (m) == NULL)
- dict_set_encoding (m, file_encoding);
- else if ( 0 != strcmp (file_encoding, dict_get_encoding (m)))
- {
- msg (MW,
- _("Combining files with incompatible encodings. String data may not be represented correctly."));
- }
- }
+ if (strcmp (dict_get_encoding (f->dict), dict_get_encoding (m)))
+ msg (MW, _("Combining files with incompatible encodings. String data may "
+ "not be represented correctly."));
- if (d_docs != NULL)
+ const struct string_array *d_docs = dict_get_documents (d);
+ const struct string_array *m_docs = dict_get_documents (m);
+ if (d_docs)
{
- if (m_docs == NULL)
+ if (!m_docs)
dict_set_documents (m, d_docs);
else
{
- char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
- dict_set_documents (m, new_docs);
- free (new_docs);
+ size_t n = m_docs->n + d_docs->n;
+ struct string_array new_docs = {
+ .strings = xmalloc (n * sizeof *new_docs.strings),
+ };
+ for (size_t i = 0; i < m_docs->n; i++)
+ new_docs.strings[new_docs.n++] = m_docs->strings[i];
+ for (size_t i = 0; i < d_docs->n; i++)
+ new_docs.strings[new_docs.n++] = d_docs->strings[i];
+
+ dict_set_documents (m, &new_docs);
+
+ free (new_docs.strings);
}
}
- for (i = 0; i < dict_get_var_cnt (d); i++)
+ for (size_t i = 0; i < dict_get_n_vars (d); i++)
{
struct variable *dv = dict_get_var (d, i);
struct variable *mv = dict_lookup_var (m, var_get_name (dv));
if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
continue;
- if (mv != NULL)
+ if (!mv)
+ {
+ mv = dict_clone_var_assert (m, dv);
+ if (proc->n_var_sources >= proc->allocated_var_sources)
+ proc->var_sources = x2nrealloc (proc->var_sources,
+ &proc->allocated_var_sources,
+ sizeof *proc->var_sources);
+ proc->var_sources[proc->n_var_sources++] = f - proc->files;
+ }
+ else
{
if (var_get_width (mv) != var_get_width (dv))
{
const char *var_name = var_get_name (dv);
- const char *file_name = fh_get_name (f->handle);
- struct string s = DS_EMPTY_INITIALIZER;
- ds_put_format (&s,
- _("Variable %s in file %s has different "
- "type or width from the same variable in "
- "earlier file."),
- var_name, file_name);
- ds_put_cstr (&s, " ");
- if (var_is_numeric (dv))
- ds_put_format (&s, _("In file %s, %s is numeric."),
- file_name, var_name);
- else
- ds_put_format (&s, _("In file %s, %s is a string variable "
- "with width %d."),
- file_name, var_name, var_get_width (dv));
- ds_put_cstr (&s, " ");
- if (var_is_numeric (mv))
- ds_put_format (&s, _("In an earlier file, %s was numeric."),
- var_name);
- else
- ds_put_format (&s, _("In an earlier file, %s was a string "
- "variable with width %d."),
- var_name, var_get_width (mv));
- msg (SE, ds_cstr (&s));
- ds_destroy (&s);
+ msg (SE, _("Variable %s has different type or width in different "
+ "files."), var_name);
+
+ for (size_t j = 0; j < 2; j++)
+ {
+ const struct variable *ev = !j ? mv : dv;
+ const struct comb_file *ef
+ = !j ? &proc->files[proc->var_sources[var_get_dict_index (mv)]] : f;
+ const char *fn = ef->handle ? fh_get_name (ef->handle) : "*";
+
+ if (var_is_numeric (ev))
+ lex_ofs_msg (lexer, SN, ef->start_ofs, ef->end_ofs,
+ _("In file %s, %s is numeric."),
+ fn, var_name);
+ else
+ lex_ofs_msg (lexer, SN, ef->start_ofs, ef->end_ofs,
+ _("In file %s, %s is a string with width %d."),
+ fn, var_name, var_get_width (ev));
+ }
+
return false;
}
if (var_get_label (dv) && !var_get_label (mv))
var_set_label (mv, var_get_label (dv));
}
- else
- mv = dict_clone_var_assert (m, dv, var_get_name (dv));
}
return true;
}
-/* If VAR_NAME is a non-empty string, attempts to create a
+/* If VAR_NAME is non-NULL, attempts to create a
variable named VAR_NAME, with format F1.0, in DICT, and stores
a pointer to the variable in *VAR. Returns true if
successful, false if the variable name is a duplicate (in
which case a message saying that the variable specified on the
- given SUBCOMMAND is a duplicate is emitted). Also returns
- true, without doing anything, if VAR_NAME is null or empty. */
+ given SUBCOMMAND is a duplicate is emitted).
+
+ Does nothing and returns true if VAR_NAME is null. */
static bool
-create_flag_var (const char *subcommand, const char *var_name,
+create_flag_var (struct lexer *lexer, const char *subcommand,
+ const char *var_name, int var_ofs,
struct dictionary *dict, struct variable **var)
{
- if (var_name[0] != '\0')
+ if (var_name != NULL)
{
struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
*var = dict_create_var (dict, var_name, 0);
if (*var == NULL)
{
- msg (SE, _("Variable name %s specified on %s subcommand "
- "duplicates an existing variable name."),
- subcommand, var_name);
+ lex_ofs_error (lexer, var_ofs, var_ofs,
+ _("Variable name %s specified on %s subcommand "
+ "duplicates an existing variable name."),
+ var_name, subcommand);
return false;
}
var_set_both_formats (*var, &format);
static void
close_all_comb_files (struct comb_proc *proc)
{
- size_t i;
-
- for (i = 0; i < proc->n_files; i++)
+ for (size_t i = 0; i < proc->n_files; i++)
{
struct comb_file *file = &proc->files[i];
- subcase_destroy (&file->by_vars);
- subcase_destroy (&file->src);
- subcase_destroy (&file->dst);
+ subcase_uninit (&file->by_vars);
+ subcase_uninit (&file->src);
+ subcase_uninit (&file->dst);
+ free (file->mv);
fh_unref (file->handle);
- dict_destroy (file->dict);
+ dict_unref (file->dict);
casereader_destroy (file->reader);
case_unref (file->data);
+ free (file->in_name);
}
free (proc->files);
proc->files = NULL;
free_comb_proc (struct comb_proc *proc)
{
close_all_comb_files (proc);
- dict_destroy (proc->dict);
+ dict_unref (proc->dict);
casewriter_destroy (proc->output);
case_matcher_destroy (proc->matcher);
- subcase_destroy (&proc->by_vars);
+ if (proc->prev_BY)
+ {
+ caseproto_destroy_values (subcase_get_proto (&proc->by_vars),
+ proc->prev_BY);
+ free (proc->prev_BY);
+ }
+ subcase_uninit (&proc->by_vars);
case_unref (proc->buffered_case);
- free (proc->prev_BY);
+ free (proc->var_sources);
}
\f
static bool scan_table (struct comb_file *, union value by[]);
static struct ccase *create_output_case (const struct comb_proc *);
static void apply_case (const struct comb_file *, struct ccase *);
-static void apply_file_case_and_advance (struct comb_file *, struct ccase *,
- union value by[]);
+static void apply_nonmissing_case (const struct comb_file *, struct ccase *);
+static void advance_file (struct comb_file *, union value by[]);
static void output_case (struct comb_proc *, struct ccase *, union value by[]);
static void output_buffered_case (struct comb_proc *);
union value *by;
while (case_matcher_match (proc->matcher, &by))
- {
- size_t i;
-
- for (i = 0; i < proc->n_files; i++)
- {
- struct comb_file *file = &proc->files[i];
- while (file->is_minimal)
- {
- struct ccase *output = create_output_case (proc);
- apply_file_case_and_advance (file, output, by);
- output_case (proc, output, by);
- }
- }
- }
+ for (size_t i = 0; i < proc->n_files; i++)
+ {
+ struct comb_file *file = &proc->files[i];
+ while (file->is_minimal)
+ {
+ struct ccase *output = create_output_case (proc);
+ apply_case (file, output);
+ advance_file (file, by);
+ output_case (proc, output, by);
+ }
+ }
output_buffered_case (proc);
}
while (case_matcher_match (proc->matcher, &by))
{
- struct ccase *output;
- size_t i;
-
- output = create_output_case (proc);
- for (i = proc->n_files; i-- > 0; )
+ struct ccase *output = create_output_case (proc);
+ for (size_t i = proc->n_files; i-- > 0;)
{
struct comb_file *file = &proc->files[i];
if (file->type == COMB_FILE)
{
if (file->is_minimal)
- apply_file_case_and_advance (file, output, NULL);
+ {
+ apply_case (file, output);
+ advance_file (file, NULL);
+ }
}
else
{
for (first = &proc->files[0]; ; first++)
if (first->is_minimal)
break;
- apply_file_case_and_advance (first, output, by);
+ apply_case (first, output);
+ advance_file (first, by);
/* Read additional cases and update the output case from
them. (Don't update the output case from any duplicate
file < &proc->files[proc->n_files]; file++)
{
while (file->is_minimal)
- apply_file_case_and_advance (file, output, by);
+ {
+ apply_nonmissing_case (file, output);
+ advance_file (file, by);
+ }
}
casewriter_write (proc->output, output);
while (first->is_minimal)
{
output = create_output_case (proc);
- apply_file_case_and_advance (first, output, by);
+ apply_case (first, output);
+ advance_file (first, by);
casewriter_write (proc->output, output);
}
}
static struct ccase *
create_output_case (const struct comb_proc *proc)
{
- size_t n_vars = dict_get_var_cnt (proc->dict);
- struct ccase *output;
- size_t i;
-
- output = case_create (dict_get_next_value_idx (proc->dict));
- for (i = 0; i < n_vars; i++)
+ size_t n_vars = dict_get_n_vars (proc->dict);
+ struct ccase *output = case_create (dict_get_proto (proc->dict));
+ for (size_t i = 0; i < n_vars; i++)
{
struct variable *v = dict_get_var (proc->dict, i);
value_set_missing (case_data_rw (output, v), var_get_width (v));
}
- for (i = 0; i < proc->n_files; i++)
+ for (size_t i = 0; i < proc->n_files; i++)
{
struct comb_file *file = &proc->files[i];
if (file->in_var != NULL)
- case_data_rw (output, file->in_var)->f = false;
+ *case_num_rw (output, file->in_var) = false;
}
return output;
}
+static void
+mark_file_used (const struct comb_file *file, struct ccase *output)
+{
+ if (file->in_var != NULL)
+ *case_num_rw (output, file->in_var) = true;
+}
+
/* Copies the data from FILE's case into output case OUTPUT.
If FILE has an IN variable, then it is set to 1 in OUTPUT. */
static void
apply_case (const struct comb_file *file, struct ccase *output)
{
subcase_copy (&file->src, file->data, &file->dst, output);
- if (file->in_var != NULL)
- case_data_rw (output, file->in_var)->f = true;
+ mark_file_used (file, output);
+}
+
+/* Copies the data from FILE's case into output case OUTPUT,
+ skipping values that are missing or all spaces.
+
+ If FILE has an IN variable, then it is set to 1 in OUTPUT. */
+static void
+apply_nonmissing_case (const struct comb_file *file, struct ccase *output)
+{
+ for (size_t i = 0; i < subcase_get_n_fields (&file->src); i++)
+ {
+ const struct subcase_field *src_field = &file->src.fields[i];
+ const struct subcase_field *dst_field = &file->dst.fields[i];
+ const union value *src_value
+ = case_data_idx (file->data, src_field->case_index);
+ int width = src_field->width;
+
+ if (!mv_is_value_missing (file->mv[i], src_value)
+ && !(width > 0 && value_is_spaces (src_value, width)))
+ value_copy (case_data_rw_idx (output, dst_field->case_index),
+ src_value, width);
+ }
+ mark_file_used (file, output);
}
-/* Like apply_case() above, but also advances FILE to its next
- case. Also, if BY is nonnull, then FILE's is_minimal member
- is updated based on whether the new case's BY values still
- match those in BY. */
+/* Advances FILE to its next case. If BY is nonnull, then FILE's is_minimal
+ member is updated based on whether the new case's BY values still match
+ those in BY. */
static void
-apply_file_case_and_advance (struct comb_file *file, struct ccase *output,
- union value by[])
+advance_file (struct comb_file *file, union value by[])
{
- apply_case (file, output);
case_unref (file->data);
file->data = casereader_read (file->reader);
if (by)
{
new_BY = !subcase_equal_xx (&proc->by_vars, proc->prev_BY, by);
if (proc->last != NULL)
- case_data_rw (proc->buffered_case, proc->last)->f = new_BY;
+ *case_num_rw (proc->buffered_case, proc->last) = new_BY;
casewriter_write (proc->output, proc->buffered_case);
}
else
proc->buffered_case = output;
if (proc->first != NULL)
- case_data_rw (proc->buffered_case, proc->first)->f = new_BY;
+ *case_num_rw (proc->buffered_case, proc->first) = new_BY;
if (new_BY)
{
- size_t n = (subcase_get_n_values (&proc->by_vars)
- * sizeof (union value));
+ size_t n_values = subcase_get_n_fields (&proc->by_vars);
+ const struct caseproto *proto = subcase_get_proto (&proc->by_vars);
if (proc->prev_BY == NULL)
- proc->prev_BY = xmalloc (n);
- memcpy (proc->prev_BY, by, n);
+ {
+ proc->prev_BY = xmalloc (n_values * sizeof *proc->prev_BY);
+ caseproto_init_values (proto, proc->prev_BY);
+ }
+ caseproto_copy (subcase_get_proto (&proc->by_vars), 0, n_values,
+ proc->prev_BY, by);
}
}
}
if (proc->prev_BY != NULL)
{
if (proc->last != NULL)
- case_data_rw (proc->buffered_case, proc->last)->f = 1.0;
+ *case_num_rw (proc->buffered_case, proc->last) = 1.0;
casewriter_write (proc->output, proc->buffered_case);
proc->buffered_case = NULL;
}