/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "data/case.h"
#include "data/casereader.h"
#include "data/casewriter.h"
+#include "data/dataset.h"
#include "data/dictionary.h"
#include "data/format.h"
-#include "data/procedure.h"
#include "data/subcase.h"
#include "data/variable.h"
#include "language/command.h"
#include "language/lexer/variable-parser.h"
#include "language/stats/sort-criteria.h"
#include "libpspp/assertion.h"
+#include "libpspp/i18n.h"
#include "libpspp/message.h"
#include "libpspp/string-array.h"
#include "libpspp/taint.h"
/* Variables. */
struct subcase by_vars; /* BY variables in this input file. */
struct subcase src, dst; /* Data to copy to output; where to put it. */
+ const struct missing_values **mv; /* Each variable's missing values. */
/* Input files. */
struct file_handle *handle; /* Input file handle. */
proc.files = NULL;
proc.n_files = 0;
- proc.dict = dict_create ();
+ proc.dict = dict_create (get_default_encoding ());
proc.output = NULL;
proc.matcher = NULL;
subcase_init_empty (&proc.by_vars);
subcase_init_empty (&file->by_vars);
subcase_init_empty (&file->src);
subcase_init_empty (&file->dst);
+ file->mv = NULL;
file->handle = NULL;
file->dict = NULL;
file->reader = NULL;
if (lex_match (lexer, T_ASTERISK))
{
- if (!proc_has_active_file (ds))
+ if (!dataset_has_source (ds))
{
- msg (SE, _("Cannot specify the active file since no active "
- "file has been defined."));
+ msg (SE, _("Cannot specify the active dataset since none "
+ "has been defined."));
goto error;
}
if (proc_make_temporary_transformations_permanent (ds))
msg (SE, _("This command may not be used after TEMPORARY when "
- "the active file is an input source. "
+ "the active dataset is an input source. "
"Temporary transformations will be made permanent."));
file->dict = dict_clone (dataset_dict (ds));
}
else
{
- file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
+ file->handle = fh_parse (lexer, FH_REF_FILE, dataset_session (ds));
if (file->handle == NULL)
goto error;
- file->reader = any_reader_open (file->handle, &file->dict);
+ file->reader = any_reader_open_and_decode (file->handle, NULL,
+ &file->dict, NULL);
if (file->reader == NULL)
goto error;
}
saw_sort = true;
}
- merge_dictionary (proc.dict, file);
+ if (!merge_dictionary (proc.dict, file))
+ goto error;
}
while (lex_token (lexer) != T_ENDCMD)
msg (SE, _("File %s lacks BY variable %s."),
fh_get_name (file->handle), name);
else
- msg (SE, _("Active file lacks BY variable %s."), name);
+ msg (SE, _("Active dataset lacks BY variable %s."),
+ name);
ok = false;
}
}
{
if (command == COMB_UPDATE)
{
- msg (SE, _("The BY subcommand is required."));
+ lex_sbc_missing ("BY");
goto error;
}
if (n_tables)
size_t src_var_cnt = dict_get_var_cnt (file->dict);
size_t j;
+ file->mv = xnmalloc (src_var_cnt, sizeof *file->mv);
for (j = 0; j < src_var_cnt; j++)
{
struct variable *src_var = dict_get_var (file->dict, j);
var_get_name (src_var));
if (dst_var != NULL)
{
+ size_t n = subcase_get_n_fields (&file->src);
+ file->mv[n] = var_get_missing_values (src_var);
subcase_add_var (&file->src, src_var, SC_ASCEND);
subcase_add_var (&file->dst, dst_var, SC_ASCEND);
}
if (active_file == NULL)
{
proc_discard_output (ds);
- file->reader = active_file = proc_open (ds);
+ file->reader = active_file = proc_open_filtering (ds, false);
}
else
file->reader = casereader_clone (active_file);
if (active_file != NULL)
proc_commit (ds);
- proc_set_active_file (ds, casewriter_make_reader (proc.output), proc.dict);
+ dataset_set_dict (ds, proc.dict);
+ dataset_set_source (ds, casewriter_make_reader (proc.output));
proc.dict = NULL;
proc.output = NULL;
struct dictionary *d = f->dict;
const struct string_array *d_docs, *m_docs;
int i;
- const char *file_encoding;
if (dict_get_label (m) == NULL)
dict_set_label (m, dict_get_label (d));
The correct thing to do would be to convert to an encoding
which can cope with all the input files (eg UTF-8).
*/
- file_encoding = dict_get_encoding (f->dict);
- if ( file_encoding != NULL)
- {
- if ( dict_get_encoding (m) == NULL)
- dict_set_encoding (m, file_encoding);
- else if ( 0 != strcmp (file_encoding, dict_get_encoding (m)))
- {
- msg (MW,
- _("Combining files with incompatible encodings. String data may not be represented correctly."));
- }
- }
+ if ( 0 != strcmp (dict_get_encoding (f->dict), dict_get_encoding (m)))
+ msg (MW, _("Combining files with incompatible encodings. String data may "
+ "not be represented correctly."));
if (d_docs != NULL)
{
if (var_get_width (mv) != var_get_width (dv))
{
const char *var_name = var_get_name (dv);
- const char *file_name = fh_get_name (f->handle);
struct string s = DS_EMPTY_INITIALIZER;
+ const char *file_name;
+
+ file_name = f->handle ? fh_get_name (f->handle) : "*";
ds_put_format (&s,
_("Variable %s in file %s has different "
"type or width from the same variable in "
if (var_has_missing_values (dv) && !var_has_missing_values (mv))
var_set_missing_values (mv, var_get_missing_values (dv));
if (var_get_label (dv) && !var_get_label (mv))
- var_set_label (mv, var_get_label (dv), file_encoding, false);
+ var_set_label (mv, var_get_label (dv));
}
else
mv = dict_clone_var_assert (m, dv);
subcase_destroy (&file->by_vars);
subcase_destroy (&file->src);
subcase_destroy (&file->dst);
+ free (file->mv);
fh_unref (file->handle);
- dict_destroy (file->dict);
+ dict_unref (file->dict);
casereader_destroy (file->reader);
case_unref (file->data);
free (file->in_name);
free_comb_proc (struct comb_proc *proc)
{
close_all_comb_files (proc);
- dict_destroy (proc->dict);
+ dict_unref (proc->dict);
casewriter_destroy (proc->output);
case_matcher_destroy (proc->matcher);
if (proc->prev_BY)
static bool scan_table (struct comb_file *, union value by[]);
static struct ccase *create_output_case (const struct comb_proc *);
static void apply_case (const struct comb_file *, struct ccase *);
-static void apply_file_case_and_advance (struct comb_file *, struct ccase *,
- union value by[]);
+static void apply_nonmissing_case (const struct comb_file *, struct ccase *);
+static void advance_file (struct comb_file *, union value by[]);
static void output_case (struct comb_proc *, struct ccase *, union value by[]);
static void output_buffered_case (struct comb_proc *);
while (file->is_minimal)
{
struct ccase *output = create_output_case (proc);
- apply_file_case_and_advance (file, output, by);
+ apply_case (file, output);
+ advance_file (file, by);
output_case (proc, output, by);
}
}
if (file->type == COMB_FILE)
{
if (file->is_minimal)
- apply_file_case_and_advance (file, output, NULL);
+ {
+ apply_case (file, output);
+ advance_file (file, NULL);
+ }
}
else
{
for (first = &proc->files[0]; ; first++)
if (first->is_minimal)
break;
- apply_file_case_and_advance (first, output, by);
+ apply_case (first, output);
+ advance_file (first, by);
/* Read additional cases and update the output case from
them. (Don't update the output case from any duplicate
file < &proc->files[proc->n_files]; file++)
{
while (file->is_minimal)
- apply_file_case_and_advance (file, output, by);
+ {
+ apply_nonmissing_case (file, output);
+ advance_file (file, by);
+ }
}
casewriter_write (proc->output, output);
while (first->is_minimal)
{
output = create_output_case (proc);
- apply_file_case_and_advance (first, output, by);
+ apply_case (first, output);
+ advance_file (first, by);
casewriter_write (proc->output, output);
}
}
return output;
}
+static void
+mark_file_used (const struct comb_file *file, struct ccase *output)
+{
+ if (file->in_var != NULL)
+ case_data_rw (output, file->in_var)->f = true;
+}
+
/* Copies the data from FILE's case into output case OUTPUT.
If FILE has an IN variable, then it is set to 1 in OUTPUT. */
static void
apply_case (const struct comb_file *file, struct ccase *output)
{
subcase_copy (&file->src, file->data, &file->dst, output);
- if (file->in_var != NULL)
- case_data_rw (output, file->in_var)->f = true;
+ mark_file_used (file, output);
+}
+
+/* Copies the data from FILE's case into output case OUTPUT,
+ skipping values that are missing or all spaces.
+
+ If FILE has an IN variable, then it is set to 1 in OUTPUT. */
+static void
+apply_nonmissing_case (const struct comb_file *file, struct ccase *output)
+{
+ size_t i;
+
+ for (i = 0; i < subcase_get_n_fields (&file->src); i++)
+ {
+ const struct subcase_field *src_field = &file->src.fields[i];
+ const struct subcase_field *dst_field = &file->dst.fields[i];
+ const union value *src_value
+ = case_data_idx (file->data, src_field->case_index);
+ int width = src_field->width;
+
+ if (!mv_is_value_missing (file->mv[i], src_value, MV_ANY)
+ && !(width > 0 && value_is_spaces (src_value, width)))
+ value_copy (case_data_rw_idx (output, dst_field->case_index),
+ src_value, width);
+ }
+ mark_file_used (file, output);
}
-/* Like apply_case() above, but also advances FILE to its next
- case. Also, if BY is nonnull, then FILE's is_minimal member
- is updated based on whether the new case's BY values still
- match those in BY. */
+/* Advances FILE to its next case. If BY is nonnull, then FILE's is_minimal
+ member is updated based on whether the new case's BY values still match
+ those in BY. */
static void
-apply_file_case_and_advance (struct comb_file *file, struct ccase *output,
- union value by[])
+advance_file (struct comb_file *file, union value by[])
{
- apply_case (file, output);
case_unref (file->data);
file->data = casereader_read (file->reader);
if (by)