/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2007, 2008 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
struct file_handle *handle; /* Input file handle. */
struct dictionary *dict; /* Input file dictionary. */
struct casereader *reader; /* Input data source. */
- struct ccase data; /* The current input case. */
+ struct ccase *data; /* The current input case. */
bool is_minimal; /* Does 'data' have minimum BY values across
all input files? */
bool is_sorted; /* Is file presorted on the BY variables? */
members used. */
struct variable *first; /* Variable specified on FIRST (if any). */
struct variable *last; /* Variable specified on LAST (if any). */
- struct ccase buffered_case; /* Case ready for output except that we don't
- know the value for the LAST variable yet. */
+ struct ccase *buffered_case; /* Case ready for output except that we don't
+ know the value for the LAST var yet. */
union value *prev_BY; /* Values of BY vars in buffered_case. */
};
subcase_init_empty (&proc.by_vars);
proc.first = NULL;
proc.last = NULL;
- case_nullify (&proc.buffered_case);
+ proc.buffered_case = NULL;
proc.prev_BY = NULL;
dict_set_case_limit (proc.dict, dict_get_case_limit (dataset_dict (ds)));
- lex_match (lexer, '/');
+ lex_match (lexer, T_SLASH);
for (;;)
{
struct comb_file *file;
}
else
break;
- lex_match (lexer, '=');
+ lex_match (lexer, T_EQUALS);
if (proc.n_files >= allocated_files)
proc.files = x2nrealloc (proc.files, &allocated_files,
file->handle = NULL;
file->dict = NULL;
file->reader = NULL;
- case_nullify (&file->data);
+ file->data = NULL;
file->is_sorted = true;
file->in_name[0] = '\0';
file->in_var = NULL;
- if (lex_match (lexer, '*'))
+ if (lex_match (lexer, T_ASTERISK))
{
if (!proc_has_active_file (ds))
{
goto error;
}
- while (lex_match (lexer, '/'))
+ while (lex_match (lexer, T_SLASH))
if (lex_match_id (lexer, "RENAME"))
{
if (!parse_dict_rename (lexer, file->dict))
}
else if (lex_match_id (lexer, "IN"))
{
- lex_match (lexer, '=');
+ lex_match (lexer, T_EQUALS);
if (lex_token (lexer) != T_ID)
{
lex_error (lexer, NULL);
"TABLE."));
goto error;
}
- strcpy (file->in_name, lex_tokid (lexer));
+ strcpy (file->in_name, lex_tokcstr (lexer));
lex_get (lexer);
}
else if (lex_match_id (lexer, "SORT"))
merge_dictionary (proc.dict, file);
}
- while (lex_token (lexer) != '.')
+ while (lex_token (lexer) != T_ENDCMD)
{
if (lex_match (lexer, T_BY))
{
}
saw_by = true;
- lex_match (lexer, '=');
+ lex_match (lexer, T_EQUALS);
if (!parse_sort_criteria (lexer, proc.dict, &proc.by_vars,
&by_vars, NULL))
goto error;
struct comb_file *file = &proc.files[i];
size_t j;
- for (j = 0; j < subcase_get_n_values (&proc.by_vars); j++)
+ for (j = 0; j < subcase_get_n_fields (&proc.by_vars); j++)
{
const char *name = var_get_name (by_vars[j]);
struct variable *var = dict_lookup_var (file->dict, name);
goto error;
}
- lex_match (lexer, '=');
+ lex_match (lexer, T_EQUALS);
if (!lex_force_id (lexer))
goto error;
- strcpy (first_name, lex_tokid (lexer));
+ strcpy (first_name, lex_tokcstr (lexer));
lex_get (lexer);
}
else if (command != COMB_UPDATE && lex_match_id (lexer, "LAST"))
goto error;
}
- lex_match (lexer, '=');
+ lex_match (lexer, T_EQUALS);
if (!lex_force_id (lexer))
goto error;
- strcpy (last_name, lex_tokid (lexer));
+ strcpy (last_name, lex_tokcstr (lexer));
lex_get (lexer);
}
else if (lex_match_id (lexer, "MAP"))
goto error;
}
- if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
+ if (!lex_match (lexer, T_SLASH) && lex_token (lexer) != T_ENDCMD)
{
lex_end_of_command (lexer);
goto error;
}
if (n_tables)
{
- msg (SE, _("BY is required when TABLE is specified."));
+ msg (SE, _("BY is required when %s is specified."), "TABLE");
goto error;
}
if (saw_sort)
{
- msg (SE, _("BY is required when SORT is specified."));
+ msg (SE, _("BY is required when %s is specified."), "SORT");
goto error;
}
}
}
}
- proc.output = autopaging_writer_create (dict_get_next_value_idx (proc.dict));
+ proc.output = autopaging_writer_create (dict_get_proto (proc.dict));
taint = taint_clone (casewriter_get_taint (proc.output));
/* Set up case matcher. */
if (!file->is_sorted)
file->reader = sort_execute (file->reader, &file->by_vars);
taint_propagate (casereader_get_taint (file->reader), taint);
- casereader_read (file->reader, &file->data);
+ file->data = casereader_read (file->reader);
if (file->type == COMB_FILE)
case_matcher_add_input (proc.matcher, &file->by_vars,
&file->data, &file->is_minimal);
struct dictionary *d = f->dict;
const char *d_docs, *m_docs;
int i;
+ const char *file_encoding;
if (dict_get_label (m) == NULL)
dict_set_label (m, dict_get_label (d));
d_docs = dict_get_documents (d);
m_docs = dict_get_documents (m);
+
+
+ /* FIXME: If the input files have different encodings, then
+ the result is undefined.
+ The correct thing to do would be to convert to an encoding
+ which can cope with all the input files (eg UTF-8).
+ */
+ file_encoding = dict_get_encoding (f->dict);
+ if ( file_encoding != NULL)
+ {
+ if ( dict_get_encoding (m) == NULL)
+ dict_set_encoding (m, file_encoding);
+ else if ( 0 != strcmp (file_encoding, dict_get_encoding (m)))
+ {
+ msg (MW,
+ _("Combining files with incompatible encodings. String data may not be represented correctly."));
+ }
+ }
+
if (d_docs != NULL)
{
if (m_docs == NULL)
ds_put_format (&s, _("In an earlier file, %s was a string "
"variable with width %d."),
var_name, var_get_width (mv));
- msg (SE, ds_cstr (&s));
+ msg (SE, "%s", ds_cstr (&s));
ds_destroy (&s);
return false;
}
var_set_label (mv, var_get_label (dv));
}
else
- mv = dict_clone_var_assert (m, dv, var_get_name (dv));
+ mv = dict_clone_var_assert (m, dv);
}
return true;
fh_unref (file->handle);
dict_destroy (file->dict);
casereader_destroy (file->reader);
- case_destroy (&file->data);
+ case_unref (file->data);
}
free (proc->files);
proc->files = NULL;
dict_destroy (proc->dict);
casewriter_destroy (proc->output);
case_matcher_destroy (proc->matcher);
+ if (proc->prev_BY)
+ {
+ caseproto_destroy_values (subcase_get_proto (&proc->by_vars),
+ proc->prev_BY);
+ free (proc->prev_BY);
+ }
subcase_destroy (&proc->by_vars);
- case_destroy (&proc->buffered_case);
- free (proc->prev_BY);
+ case_unref (proc->buffered_case);
}
\f
static bool scan_table (struct comb_file *, union value by[]);
-static void create_output_case (const struct comb_proc *, struct ccase *);
+static struct ccase *create_output_case (const struct comb_proc *);
static void apply_case (const struct comb_file *, struct ccase *);
static void apply_file_case_and_advance (struct comb_file *, struct ccase *,
union value by[]);
while (case_matcher_match (proc->matcher, &by))
{
- struct ccase output;
size_t i;
for (i = 0; i < proc->n_files; i++)
struct comb_file *file = &proc->files[i];
while (file->is_minimal)
{
- create_output_case (proc, &output);
- apply_file_case_and_advance (file, &output, by);
- output_case (proc, &output, by);
+ struct ccase *output = create_output_case (proc);
+ apply_file_case_and_advance (file, output, by);
+ output_case (proc, output, by);
}
}
}
while (case_matcher_match (proc->matcher, &by))
{
- struct ccase output;
+ struct ccase *output;
size_t i;
- create_output_case (proc, &output);
+ output = create_output_case (proc);
for (i = proc->n_files; i-- > 0; )
{
struct comb_file *file = &proc->files[i];
if (file->type == COMB_FILE)
{
if (file->is_minimal)
- apply_file_case_and_advance (file, &output, NULL);
+ apply_file_case_and_advance (file, output, NULL);
}
else
{
if (scan_table (file, by))
- apply_case (file, &output);
+ apply_case (file, output);
}
}
- output_case (proc, &output, by);
+ output_case (proc, output, by);
}
output_buffered_case (proc);
}
while (case_matcher_match (proc->matcher, &by))
{
struct comb_file *first, *file;
- struct ccase output;
+ struct ccase *output;
/* Find first nonnull case in array and make an output case
from it. */
- create_output_case (proc, &output);
+ output = create_output_case (proc);
for (first = &proc->files[0]; ; first++)
if (first->is_minimal)
break;
- apply_file_case_and_advance (first, &output, by);
+ apply_file_case_and_advance (first, output, by);
/* Read additional cases and update the output case from
them. (Don't update the output case from any duplicate
file < &proc->files[proc->n_files]; file++)
{
while (file->is_minimal)
- apply_file_case_and_advance (file, &output, by);
+ apply_file_case_and_advance (file, output, by);
}
- casewriter_write (proc->output, &output);
+ casewriter_write (proc->output, output);
/* Write duplicate cases in the master file directly to the
output. */
n_duplicates++;
while (first->is_minimal)
{
- create_output_case (proc, &output);
- apply_file_case_and_advance (first, &output, by);
- casewriter_write (proc->output, &output);
+ output = create_output_case (proc);
+ apply_file_case_and_advance (first, output, by);
+ casewriter_write (proc->output, output);
}
}
}
static bool
scan_table (struct comb_file *file, union value by[])
{
- while (!case_is_null (&file->data))
+ while (file->data != NULL)
{
- int cmp = subcase_compare_3way_xc (&file->by_vars, by, &file->data);
+ int cmp = subcase_compare_3way_xc (&file->by_vars, by, file->data);
if (cmp > 0)
{
- case_destroy (&file->data);
- casereader_read (file->reader, &file->data);
+ case_unref (file->data);
+ file->data = casereader_read (file->reader);
}
else
return cmp == 0;
return false;
}
-/* Creates OUTPUT as an output case for PROC, by initializing each of
- its values to system-missing or blanks, except that the values
- of IN variables are set to 0. */
-static void
-create_output_case (const struct comb_proc *proc, struct ccase *output)
+/* Creates and returns an output case for PROC, initializing each
+ of its values to system-missing or blanks, except that the
+ values of IN variables are set to 0. */
+static struct ccase *
+create_output_case (const struct comb_proc *proc)
{
size_t n_vars = dict_get_var_cnt (proc->dict);
+ struct ccase *output;
size_t i;
- case_create (output, dict_get_next_value_idx (proc->dict));
+ output = case_create (dict_get_proto (proc->dict));
for (i = 0; i < n_vars; i++)
{
struct variable *v = dict_get_var (proc->dict, i);
if (file->in_var != NULL)
case_data_rw (output, file->in_var)->f = false;
}
+ return output;
}
/* Copies the data from FILE's case into output case OUTPUT.
static void
apply_case (const struct comb_file *file, struct ccase *output)
{
- subcase_copy (&file->src, &file->data, &file->dst, output);
+ subcase_copy (&file->src, file->data, &file->dst, output);
if (file->in_var != NULL)
case_data_rw (output, file->in_var)->f = true;
}
union value by[])
{
apply_case (file, output);
- case_destroy (&file->data);
- casereader_read (file->reader, &file->data);
+ case_unref (file->data);
+ file->data = casereader_read (file->reader);
if (by)
- file->is_minimal = (!case_is_null (&file->data)
- && subcase_equal_cx (&file->by_vars, &file->data, by));
+ file->is_minimal = (file->data != NULL
+ && subcase_equal_cx (&file->by_vars, file->data, by));
}
/* Writes OUTPUT, whose BY values has been extracted into BY, to
{
new_BY = !subcase_equal_xx (&proc->by_vars, proc->prev_BY, by);
if (proc->last != NULL)
- case_data_rw (&proc->buffered_case, proc->last)->f = new_BY;
- casewriter_write (proc->output, &proc->buffered_case);
+ case_data_rw (proc->buffered_case, proc->last)->f = new_BY;
+ casewriter_write (proc->output, proc->buffered_case);
}
else
new_BY = true;
- case_move (&proc->buffered_case, output);
+ proc->buffered_case = output;
if (proc->first != NULL)
- case_data_rw (&proc->buffered_case, proc->first)->f = new_BY;
+ case_data_rw (proc->buffered_case, proc->first)->f = new_BY;
if (new_BY)
{
- size_t n = (subcase_get_n_values (&proc->by_vars)
- * sizeof (union value));
+ size_t n_values = subcase_get_n_fields (&proc->by_vars);
+ const struct caseproto *proto = subcase_get_proto (&proc->by_vars);
if (proc->prev_BY == NULL)
- proc->prev_BY = xmalloc (n);
- memcpy (proc->prev_BY, by, n);
+ {
+ proc->prev_BY = xmalloc (n_values * sizeof *proc->prev_BY);
+ caseproto_init_values (proto, proc->prev_BY);
+ }
+ caseproto_copy (subcase_get_proto (&proc->by_vars), 0, n_values,
+ proc->prev_BY, by);
}
}
}
if (proc->prev_BY != NULL)
{
if (proc->last != NULL)
- case_data_rw (&proc->buffered_case, proc->last)->f = 1.0;
- casewriter_write (proc->output, &proc->buffered_case);
- case_nullify (&proc->buffered_case);
+ case_data_rw (proc->buffered_case, proc->last)->f = 1.0;
+ casewriter_write (proc->output, proc->buffered_case);
+ proc->buffered_case = NULL;
}
}