-/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
- Written by Ben Pfaff <blp@gnu.org>.
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA. */
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include <data/any-reader.h>
#include <data/any-writer.h>
-#include <data/case-sink.h>
-#include <data/case-source.h>
#include <data/case.h>
-#include <data/casefile.h>
-#include <data/fastfile.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
+#include <data/format.h>
#include <data/dictionary.h>
#include <data/por-file-writer.h>
#include <data/procedure.h>
#include <data/settings.h>
-#include <data/storage-stream.h>
#include <data/sys-file-writer.h>
#include <data/transformations.h>
#include <data/value-labels.h>
#include <libpspp/compiler.h>
#include <libpspp/hash.h>
#include <libpspp/message.h>
-#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/str.h>
+#include <libpspp/taint.h>
#include "gettext.h"
#define _(msgid) gettext (msgid)
const struct ccase *, struct ccase *);
static void destroy_case_map (struct case_map *);
-static bool parse_dict_trim (struct dictionary *);
+static bool parse_dict_trim (struct lexer *, struct dictionary *);
\f
/* Reading system and portable files. */
/* Type of command. */
-enum reader_command
+enum reader_command
{
GET_CMD,
IMPORT_CMD
};
-/* Case reader input program. */
-struct case_reader_pgm
- {
- struct any_reader *reader; /* File reader. */
- struct case_map *map; /* Map from file dict to active file dict. */
- struct ccase bounce; /* Bounce buffer. */
- };
-
-static const struct case_source_class case_reader_source_class;
-
-static void case_reader_pgm_free (struct case_reader_pgm *);
+static void get_translate_case (const struct ccase *, struct ccase *,
+ void *map_);
+static bool get_destroy_case_map (void *map_);
/* Parses a GET or IMPORT command. */
static int
-parse_read_command (enum reader_command type)
+parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command type)
{
- struct case_reader_pgm *pgm = NULL;
+ struct casereader *reader = NULL;
struct file_handle *fh = NULL;
struct dictionary *dict = NULL;
+ struct case_map *map = NULL;
for (;;)
{
- lex_match ('/');
+ lex_match (lexer, '/');
- if (lex_match_id ("FILE") || token == T_STRING)
+ if (lex_match_id (lexer, "FILE") || lex_token (lexer) == T_STRING)
{
- lex_match ('=');
+ lex_match (lexer, '=');
- fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
+ fh = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
if (fh == NULL)
goto error;
}
- else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
+ else if (type == IMPORT_CMD && lex_match_id (lexer, "TYPE"))
{
- lex_match ('=');
+ lex_match (lexer, '=');
- if (lex_match_id ("COMM"))
+ if (lex_match_id (lexer, "COMM"))
type = PFM_COMM;
- else if (lex_match_id ("TAPE"))
+ else if (lex_match_id (lexer, "TAPE"))
type = PFM_TAPE;
else
{
- lex_error (_("expecting COMM or TAPE"));
+ lex_error (lexer, _("expecting COMM or TAPE"));
goto error;
}
}
else
- break;
+ break;
}
-
- if (fh == NULL)
+
+ if (fh == NULL)
{
- lex_sbc_missing ("FILE");
+ lex_sbc_missing (lexer, "FILE");
goto error;
}
-
- discard_variables (current_dataset);
-
- pgm = xmalloc (sizeof *pgm);
- pgm->reader = any_reader_open (fh, &dict);
- pgm->map = NULL;
- case_nullify (&pgm->bounce);
- if (pgm->reader == NULL)
+
+ reader = any_reader_open (fh, &dict);
+ if (reader == NULL)
goto error;
- case_create (&pgm->bounce, dict_get_next_value_idx (dict));
-
start_case_map (dict);
- while (token != '.')
+ while (lex_token (lexer) != '.')
{
- lex_match ('/');
- if (!parse_dict_trim (dict))
+ lex_match (lexer, '/');
+ if (!parse_dict_trim (lexer, dict))
goto error;
}
- pgm->map = finish_case_map (dict);
-
- dict_destroy (dataset_dict (current_dataset));
- dataset_set_dict (current_dataset, dict);
+ map = finish_case_map (dict);
+ if (map != NULL)
+ reader = casereader_create_translator (reader,
+ dict_get_next_value_idx (dict),
+ get_translate_case,
+ get_destroy_case_map,
+ map);
- proc_set_source (current_dataset,
- create_case_source (&case_reader_source_class, pgm));
+ proc_set_active_file (ds, reader, dict);
return CMD_SUCCESS;
error:
- case_reader_pgm_free (pgm);
+ casereader_destroy (reader);
if (dict != NULL)
dict_destroy (dict);
return CMD_CASCADING_FAILURE;
}
-/* Frees a struct case_reader_pgm. */
-static void
-case_reader_pgm_free (struct case_reader_pgm *pgm)
-{
- if (pgm != NULL)
- {
- any_reader_close (pgm->reader);
- destroy_case_map (pgm->map);
- case_destroy (&pgm->bounce);
- free (pgm);
- }
-}
-
-/* Clears internal state related to case reader input procedure. */
static void
-case_reader_source_destroy (struct case_source *source)
+get_translate_case (const struct ccase *input, struct ccase *output,
+ void *map_)
{
- struct case_reader_pgm *pgm = source->aux;
- case_reader_pgm_free (pgm);
+ struct case_map *map = map_;
+ map_case (map, input, output);
}
-/* Reads all the cases from the data file into C and passes them
- to WRITE_CASE one by one, passing WC_DATA.
- Returns true if successful, false if an I/O error occurred. */
static bool
-case_reader_source_read (struct case_source *source,
- struct ccase *c,
- write_case_func *write_case, write_case_data wc_data)
+get_destroy_case_map (void *map_)
{
- struct case_reader_pgm *pgm = source->aux;
- bool ok = true;
-
- do
- {
- bool got_case;
- if (pgm->map == NULL)
- got_case = any_reader_read (pgm->reader, c);
- else
- {
- got_case = any_reader_read (pgm->reader, &pgm->bounce);
- if (got_case)
- map_case (pgm->map, &pgm->bounce, c);
- }
- if (!got_case)
- break;
-
- ok = write_case (wc_data);
- }
- while (ok);
-
- return ok && !any_reader_error (pgm->reader);
+ struct case_map *map = map_;
+ destroy_case_map (map);
+ return true;
}
-
-static const struct case_source_class case_reader_source_class =
- {
- "case reader",
- NULL,
- case_reader_source_read,
- case_reader_source_destroy,
- };
\f
/* GET. */
int
-cmd_get (void)
+cmd_get (struct lexer *lexer, struct dataset *ds)
{
- return parse_read_command (GET_CMD);
+ return parse_read_command (lexer, ds, GET_CMD);
}
/* IMPORT. */
int
-cmd_import (void)
+cmd_import (struct lexer *lexer, struct dataset *ds)
{
- return parse_read_command (IMPORT_CMD);
+ return parse_read_command (lexer, ds, IMPORT_CMD);
}
\f
-/* Writing system and portable files. */
+/* Writing system and portable files. */
/* Type of output file. */
enum writer_type
};
/* Type of a command. */
-enum command_type
+enum command_type
{
XFORM_CMD, /* Transformation. */
PROC_CMD /* Procedure. */
};
-/* File writer plus a case map. */
-struct case_writer
- {
- struct any_writer *writer; /* File writer. */
- struct case_map *map; /* Map to output file dictionary
- (null pointer for identity mapping). */
- struct ccase bounce; /* Bounce buffer for mapping (if needed). */
- };
-
-/* Destroys AW. */
-static bool
-case_writer_destroy (struct case_writer *aw)
-{
- bool ok = true;
- if (aw != NULL)
- {
- ok = any_writer_close (aw->writer);
- destroy_case_map (aw->map);
- case_destroy (&aw->bounce);
- free (aw);
- }
- return ok;
-}
-
/* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
WRITER_TYPE identifies the type of file to write,
and COMMAND_TYPE identifies the type of command.
included.
On failure, returns a null pointer. */
-static struct case_writer *
-parse_write_command (enum writer_type writer_type,
+static struct casewriter *
+parse_write_command (struct lexer *lexer, struct dataset *ds,
+ enum writer_type writer_type,
enum command_type command_type,
bool *retain_unselected)
{
/* Common data. */
struct file_handle *handle; /* Output file. */
struct dictionary *dict; /* Dictionary for output file. */
- struct case_writer *aw; /* Writer. */
+ struct casewriter *writer; /* Writer. */
+ struct case_map *map; /* Map from input data to data for writer. */
/* Common options. */
bool print_map; /* Print map? TODO. */
*retain_unselected = true;
handle = NULL;
- dict = dict_clone (dataset_dict (current_dataset));
- aw = xmalloc (sizeof *aw);
- aw->writer = NULL;
- aw->map = NULL;
- case_nullify (&aw->bounce);
+ dict = dict_clone (dataset_dict (ds));
+ writer = NULL;
+ map = NULL;
print_map = false;
print_short_names = false;
sysfile_opts = sfm_writer_default_options ();
start_case_map (dict);
dict_delete_scratch_vars (dict);
- lex_match ('/');
+ lex_match (lexer, '/');
for (;;)
{
- if (lex_match_id ("OUTFILE"))
+ if (lex_match_id (lexer, "OUTFILE"))
{
- if (handle != NULL)
+ if (handle != NULL)
{
lex_sbc_only_once ("OUTFILE");
- goto error;
+ goto error;
}
-
- lex_match ('=');
-
- handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
+
+ lex_match (lexer, '=');
+
+ handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
if (handle == NULL)
goto error;
}
- else if (lex_match_id ("NAMES"))
+ else if (lex_match_id (lexer, "NAMES"))
print_short_names = true;
- else if (lex_match_id ("PERMISSIONS"))
+ else if (lex_match_id (lexer, "PERMISSIONS"))
{
bool cw;
-
- lex_match ('=');
- if (lex_match_id ("READONLY"))
+
+ lex_match (lexer, '=');
+ if (lex_match_id (lexer, "READONLY"))
cw = false;
- else if (lex_match_id ("WRITEABLE"))
+ else if (lex_match_id (lexer, "WRITEABLE"))
cw = true;
else
{
- lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
+ lex_error (lexer, _("expecting %s or %s"), "READONLY", "WRITEABLE");
goto error;
}
sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
}
- else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
+ else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
{
- lex_match ('=');
- if (lex_match_id ("RETAIN"))
+ lex_match (lexer, '=');
+ if (lex_match_id (lexer, "RETAIN"))
*retain_unselected = true;
- else if (lex_match_id ("DELETE"))
+ else if (lex_match_id (lexer, "DELETE"))
*retain_unselected = false;
else
{
- lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
+ lex_error (lexer, _("expecting %s or %s"), "RETAIN", "DELETE");
goto error;
}
}
- else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
+ else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "COMPRESSED"))
sysfile_opts.compress = true;
- else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
+ else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "UNCOMPRESSED"))
sysfile_opts.compress = false;
- else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
+ else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "VERSION"))
{
- lex_match ('=');
- if (!lex_force_int ())
+ lex_match (lexer, '=');
+ if (!lex_force_int (lexer))
goto error;
- sysfile_opts.version = lex_integer ();
- lex_get ();
+ sysfile_opts.version = lex_integer (lexer);
+ lex_get (lexer);
}
- else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
+ else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
{
- lex_match ('=');
- if (lex_match_id ("COMMUNICATIONS"))
+ lex_match (lexer, '=');
+ if (lex_match_id (lexer, "COMMUNICATIONS"))
porfile_opts.type = PFM_COMM;
- else if (lex_match_id ("TAPE"))
+ else if (lex_match_id (lexer, "TAPE"))
porfile_opts.type = PFM_TAPE;
else
{
- lex_error (_("expecting %s or %s"), "COMM", "TAPE");
+ lex_error (lexer, _("expecting %s or %s"), "COMM", "TAPE");
goto error;
}
}
- else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
+ else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
{
- lex_match ('=');
- if (!lex_force_int ())
+ lex_match (lexer, '=');
+ if (!lex_force_int (lexer))
goto error;
- porfile_opts.digits = lex_integer ();
- lex_get ();
+ porfile_opts.digits = lex_integer (lexer);
+ lex_get (lexer);
}
- else if (!parse_dict_trim (dict))
+ else if (!parse_dict_trim (lexer, dict))
goto error;
-
- if (!lex_match ('/'))
+
+ if (!lex_match (lexer, '/'))
break;
}
- if (lex_end_of_command () != CMD_SUCCESS)
+ if (lex_end_of_command (lexer) != CMD_SUCCESS)
goto error;
- if (handle == NULL)
+ if (handle == NULL)
{
- lex_sbc_missing ("OUTFILE");
+ lex_sbc_missing (lexer, "OUTFILE");
goto error;
}
dict_compact_values (dict);
- aw->map = finish_case_map (dict);
- if (aw->map != NULL)
- case_create (&aw->bounce, dict_get_next_value_idx (dict));
- if (fh_get_referent (handle) == FH_REF_FILE)
+ if (fh_get_referent (handle) == FH_REF_FILE)
{
- switch (writer_type)
+ switch (writer_type)
{
case SYSFILE_WRITER:
- aw->writer = any_writer_from_sfm_writer (
- sfm_open_writer (handle, dict, sysfile_opts));
+ writer = sfm_open_writer (handle, dict, sysfile_opts);
break;
case PORFILE_WRITER:
- aw->writer = any_writer_from_pfm_writer (
- pfm_open_writer (handle, dict, porfile_opts));
+ writer = pfm_open_writer (handle, dict, porfile_opts);
break;
}
}
else
- aw->writer = any_writer_open (handle, dict);
- if (aw->writer == NULL)
+ writer = any_writer_open (handle, dict);
+ if (writer == NULL)
goto error;
+
+ map = finish_case_map (dict);
+ if (map != NULL)
+ writer = casewriter_create_translator (writer,
+ get_translate_case,
+ get_destroy_case_map,
+ map);
dict_destroy (dict);
-
- return aw;
+
+ return writer;
error:
- case_writer_destroy (aw);
+ casewriter_destroy (writer);
dict_destroy (dict);
+ destroy_case_map (map);
return NULL;
}
-
-/* Writes case C to writer AW. */
-static bool
-case_writer_write_case (struct case_writer *aw, const struct ccase *c)
-{
- if (aw->map != NULL)
- {
- map_case (aw->map, c, &aw->bounce);
- c = &aw->bounce;
- }
- return any_writer_write (aw->writer, c);
-}
\f
/* SAVE and EXPORT. */
-static bool output_proc (const struct ccase *, void *);
-
/* Parses and performs the SAVE or EXPORT procedure. */
static int
-parse_output_proc (enum writer_type writer_type)
+parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
{
bool retain_unselected;
struct variable *saved_filter_variable;
- struct case_writer *aw;
+ struct casewriter *output;
bool ok;
- aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
- if (aw == NULL)
+ output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
+ &retain_unselected);
+ if (output == NULL)
return CMD_CASCADING_FAILURE;
- saved_filter_variable = dict_get_filter (dataset_dict (current_dataset));
- if (retain_unselected)
- dict_set_filter (dataset_dict (current_dataset), NULL);
- ok = procedure (current_dataset,output_proc, aw);
- dict_set_filter (dataset_dict (current_dataset), saved_filter_variable);
+ saved_filter_variable = dict_get_filter (dataset_dict (ds));
+ if (retain_unselected)
+ dict_set_filter (dataset_dict (ds), NULL);
- case_writer_destroy (aw);
- return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
-}
+ casereader_transfer (proc_open (ds), output);
+ ok = casewriter_destroy (output);
+ ok = proc_commit (ds) && ok;
-/* Writes case C to file. */
-static bool
-output_proc (const struct ccase *c, void *aw_)
-{
- struct case_writer *aw = aw_;
- return case_writer_write_case (aw, c);
+ dict_set_filter (dataset_dict (ds), saved_filter_variable);
+
+ return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
}
int
-cmd_save (void)
+cmd_save (struct lexer *lexer, struct dataset *ds)
{
- return parse_output_proc (SYSFILE_WRITER);
+ return parse_output_proc (lexer, ds, SYSFILE_WRITER);
}
int
-cmd_export (void)
+cmd_export (struct lexer *lexer, struct dataset *ds)
{
- return parse_output_proc (PORFILE_WRITER);
+ return parse_output_proc (lexer, ds, PORFILE_WRITER);
}
\f
/* XSAVE and XEXPORT. */
/* Transformation. */
-struct output_trns
+struct output_trns
{
- struct case_writer *aw; /* Writer. */
+ struct casewriter *writer; /* Writer. */
};
static trns_proc_func output_trns_proc;
/* Parses the XSAVE or XEXPORT transformation command. */
static int
-parse_output_trns (enum writer_type writer_type)
+parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
{
struct output_trns *t = xmalloc (sizeof *t);
- t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
- if (t->aw == NULL)
+ t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
+ if (t->writer == NULL)
{
free (t);
return CMD_CASCADING_FAILURE;
}
- add_transformation (current_dataset, output_trns_proc, output_trns_free, t);
+ add_transformation (ds, output_trns_proc, output_trns_free, t);
return CMD_SUCCESS;
}
/* Writes case C to the system file specified on XSAVE or XEXPORT. */
static int
-output_trns_proc (void *trns_, struct ccase *c, casenum_t case_num UNUSED)
+output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
{
struct output_trns *t = trns_;
- case_writer_write_case (t->aw, c);
+ struct ccase tmp;
+ case_clone (&tmp, c);
+ casewriter_write (t->writer, &tmp);
return TRNS_CONTINUE;
}
output_trns_free (void *trns_)
{
struct output_trns *t = trns_;
- bool ok = true;
-
- if (t != NULL)
- {
- ok = case_writer_destroy (t->aw);
- free (t);
- }
+ bool ok = casewriter_destroy (t->writer);
+ free (t);
return ok;
}
/* XSAVE command. */
int
-cmd_xsave (void)
+cmd_xsave (struct lexer *lexer, struct dataset *ds)
{
- return parse_output_trns (SYSFILE_WRITER);
+ return parse_output_trns (lexer, ds, SYSFILE_WRITER);
}
/* XEXPORT command. */
int
-cmd_xexport (void)
+cmd_xexport (struct lexer *lexer, struct dataset *ds)
{
- return parse_output_trns (PORFILE_WRITER);
+ return parse_output_trns (lexer, ds, PORFILE_WRITER);
}
\f
-static bool rename_variables (struct dictionary *dict);
-static bool drop_variables (struct dictionary *dict);
-static bool keep_variables (struct dictionary *dict);
+static bool rename_variables (struct lexer *lexer, struct dictionary *dict);
+static bool drop_variables (struct lexer *, struct dictionary *dict);
+static bool keep_variables (struct lexer *, struct dictionary *dict);
/* Commands that read and write system files share a great deal
of common syntactic structure for rearranging and dropping
variables. This function parses this syntax and modifies DICT
appropriately. Returns true on success, false on failure. */
static bool
-parse_dict_trim (struct dictionary *dict)
+parse_dict_trim (struct lexer *lexer, struct dictionary *dict)
{
- if (lex_match_id ("MAP"))
+ if (lex_match_id (lexer, "MAP"))
{
/* FIXME. */
return true;
}
- else if (lex_match_id ("DROP"))
- return drop_variables (dict);
- else if (lex_match_id ("KEEP"))
- return keep_variables (dict);
- else if (lex_match_id ("RENAME"))
- return rename_variables (dict);
+ else if (lex_match_id (lexer, "DROP"))
+ return drop_variables (lexer, dict);
+ else if (lex_match_id (lexer, "KEEP"))
+ return keep_variables (lexer, dict);
+ else if (lex_match_id (lexer, "RENAME"))
+ return rename_variables (lexer, dict);
else
{
- lex_error (_("expecting a valid subcommand"));
+ lex_error (lexer, _("expecting a valid subcommand"));
return false;
}
}
/* Parses and performs the RENAME subcommand of GET and SAVE. */
static bool
-rename_variables (struct dictionary *dict)
+rename_variables (struct lexer *lexer, struct dictionary *dict)
{
size_t i;
int group;
- lex_match ('=');
- if (token != '(')
+ lex_match (lexer, '=');
+ if (lex_token (lexer) != '(')
{
struct variable *v;
- v = parse_dict_variable (dict);
+ v = parse_variable (lexer, dict);
if (v == NULL)
return 0;
- if (!lex_force_match ('=')
- || !lex_force_id ())
+ if (!lex_force_match (lexer, '=')
+ || !lex_force_id (lexer))
return 0;
- if (dict_lookup_var (dict, tokid) != NULL)
+ if (dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
{
msg (SE, _("Cannot rename %s as %s because there already exists "
"a variable named %s. To rename variables with "
"overlapping names, use a single RENAME subcommand "
"such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
- "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
+ "\"/RENAME (A B C=B C A)\"."),
+ var_get_name (v), lex_tokid (lexer), lex_tokid (lexer));
return 0;
}
-
- dict_rename_var (dict, v, tokid);
- lex_get ();
+
+ dict_rename_var (dict, v, lex_tokid (lexer));
+ lex_get (lexer);
return 1;
}
v = NULL;
new_names = 0;
group = 1;
- while (lex_match ('('))
+ while (lex_match (lexer, '('))
{
size_t old_nv = nv;
- if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
+ if (!parse_variables (lexer, dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
goto done;
- if (!lex_match ('='))
+ if (!lex_match (lexer, '='))
{
msg (SE, _("`=' expected after variable list."));
goto done;
}
- if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
+ if (!parse_DATA_LIST_vars (lexer, &new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
goto done;
if (nn != nv)
{
(unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
goto done;
}
- if (!lex_force_match (')'))
+ if (!lex_force_match (lexer, ')'))
goto done;
group++;
}
- if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
+ if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
{
msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
goto done;
/* Parses and performs the DROP subcommand of GET and SAVE.
Returns true if successful, false on failure.*/
static bool
-drop_variables (struct dictionary *dict)
+drop_variables (struct lexer *lexer, struct dictionary *dict)
{
struct variable **v;
size_t nv;
- lex_match ('=');
- if (!parse_variables (dict, &v, &nv, PV_NONE))
+ lex_match (lexer, '=');
+ if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
return false;
dict_delete_vars (dict, v, nv);
free (v);
/* Parses and performs the KEEP subcommand of GET and SAVE.
Returns true if successful, false on failure.*/
static bool
-keep_variables (struct dictionary *dict)
+keep_variables (struct lexer *lexer, struct dictionary *dict)
{
struct variable **v;
size_t nv;
size_t i;
- lex_match ('=');
- if (!parse_variables (dict, &v, &nv, PV_NONE))
+ lex_match (lexer, '=');
+ if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
return false;
/* Move the specified variables to the beginning. */
dict_reorder_vars (dict, v, nv);
-
+
/* Delete the remaining variables. */
v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
for (i = nv; i < dict_get_var_cnt (dict); i++)
/* MATCH FILES. */
/* File types. */
-enum
+enum mtf_type
{
MTF_FILE, /* Specified on FILE= subcommand. */
MTF_TABLE /* Specified on TABLE= subcommand. */
};
-/* One of the files on MATCH FILES. */
+/* One of the FILEs or TABLEs on MATCH FILES. */
struct mtf_file
{
- struct mtf_file *next, *prev; /* Next, previous in the list of files. */
- struct mtf_file *next_min; /* Next in the chain of minimums. */
-
- int type; /* One of MTF_*. */
- struct variable **by; /* List of BY variables for this file. */
- struct file_handle *handle; /* File handle. */
- struct any_reader *reader; /* File reader. */
- struct dictionary *dict; /* Dictionary from system file. */
+ struct ll ll; /* In list of all files and tables. */
+
+ enum mtf_type type;
+ int sequence;
+
+ const struct variable **by; /* List of BY variables for this file. */
+ struct mtf_variable *vars; /* Variables to copy to output. */
+ size_t var_cnt; /* Number of other variables. */
+
+ struct file_handle *handle; /* Input file handle. */
+ struct dictionary *dict; /* Input file dictionary. */
+ struct casereader *reader; /* Input reader. */
+ struct ccase input; /* Input record (null at end of file). */
/* IN subcommand. */
char *in_name; /* Variable name. */
struct variable *in_var; /* Variable (in master dictionary). */
+ };
- struct ccase input; /* Input record. */
+struct mtf_variable
+ {
+ struct variable *in_var;
+ struct variable *out_var;
};
/* MATCH FILES procedure. */
-struct mtf_proc
+struct mtf_proc
{
- struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
- struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
+ struct ll_list files; /* List of "struct mtf_file"s. */
+ int nonempty_files; /* FILEs that are not at end-of-file. */
bool ok; /* False if I/O error occurs. */
- size_t by_cnt; /* Number of variables on BY subcommand. */
-
- /* Names of FIRST, LAST variables. */
- char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
-
struct dictionary *dict; /* Dictionary of output file. */
- struct casefile *output; /* MATCH FILES output. */
- struct ccase mtf_case; /* Case used for output. */
+ struct casewriter *output; /* MATCH FILES output. */
- unsigned seq_num; /* Have we initialized this variable? */
- unsigned *seq_nums; /* Sequence numbers for each var in dict. */
+ size_t by_cnt; /* Number of variables on BY subcommand. */
+
+ /* FIRST, LAST.
+ Only if "first" or "last" is nonnull are the remaining
+ members used. */
+ struct variable *first; /* Variable specified on FIRST (if any). */
+ struct variable *last; /* Variable specified on LAST (if any). */
+ struct ccase buffered_case; /* Case ready for output except that we don't
+ know the value for the LAST variable yet. */
+ struct ccase prev_BY_case; /* Case with values of last set of BY vars. */
+ const struct variable **prev_BY; /* Last set of BY variables. */
};
-static bool mtf_free (struct mtf_proc *);
-static bool mtf_close_file (struct mtf_file *);
-static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
-static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
+static void mtf_free (struct mtf_proc *);
-static bool mtf_read_nonactive_records (void *);
-static bool mtf_processing_finish (void *);
-static bool mtf_processing (const struct ccase *, void *);
+static bool mtf_close_all_files (struct mtf_proc *);
+static bool mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
+static bool mtf_read_record (struct mtf_proc *mtf, struct mtf_file *);
-static char *var_type_description (struct variable *);
+static void mtf_process_case (struct mtf_proc *);
-static void set_master (struct variable *, struct variable *master);
-static struct variable *get_master (struct variable *);
+static bool create_flag_var (const char *subcommand_name, const char *var_name,
+ struct dictionary *, struct variable **);
+static char *var_type_description (struct variable *);
/* Parse and execute the MATCH FILES command. */
int
-cmd_match_files (void)
+cmd_match_files (struct lexer *lexer, struct dataset *ds)
{
struct mtf_proc mtf;
- struct mtf_file *first_table = NULL;
- struct mtf_file *iter;
-
- bool used_active_file = false;
- bool saw_table = false;
+ struct ll *first_table;
+ struct mtf_file *file, *next;
+
bool saw_in = false;
+ struct casereader *active_file = NULL;
- bool ok;
-
- mtf.head = mtf.tail = NULL;
- mtf.by_cnt = 0;
- mtf.first[0] = '\0';
- mtf.last[0] = '\0';
+ char first_name[LONG_NAME_LEN + 1] = "";
+ char last_name[LONG_NAME_LEN + 1] = "";
+
+ struct taint *taint = NULL;
+
+ size_t i;
+
+ ll_init (&mtf.files);
+ mtf.nonempty_files = 0;
+ first_table = ll_null (&mtf.files);
mtf.dict = dict_create ();
mtf.output = NULL;
- case_nullify (&mtf.mtf_case);
- mtf.seq_num = 0;
- mtf.seq_nums = NULL;
- dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (current_dataset)));
-
- lex_match ('/');
- while (token == T_ID
- && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
- {
- struct mtf_file *file = xmalloc (sizeof *file);
+ mtf.by_cnt = 0;
+ mtf.first = mtf.last = NULL;
+ case_nullify (&mtf.buffered_case);
+ case_nullify (&mtf.prev_BY_case);
+ mtf.prev_BY = NULL;
- if (lex_match_id ("FILE"))
- file->type = MTF_FILE;
- else if (lex_match_id ("TABLE"))
- {
- file->type = MTF_TABLE;
- saw_table = true;
- }
- else
- NOT_REACHED ();
- lex_match ('=');
+ dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (ds)));
+ lex_match (lexer, '/');
+ while (lex_token (lexer) == T_ID
+ && (lex_id_match (ss_cstr ("FILE"), ss_cstr (lex_tokid (lexer)))
+ || lex_id_match (ss_cstr ("TABLE"), ss_cstr (lex_tokid (lexer)))))
+ {
+ struct mtf_file *file = xmalloc (sizeof *file);
file->by = NULL;
file->handle = NULL;
file->reader = NULL;
file->dict = NULL;
file->in_name = NULL;
file->in_var = NULL;
+ file->var_cnt = 0;
+ file->vars = NULL;
case_nullify (&file->input);
- /* FILEs go first, then TABLEs. */
- if (file->type == MTF_TABLE || first_table == NULL)
+ if (lex_match_id (lexer, "FILE"))
{
- file->next = NULL;
- file->prev = mtf.tail;
- if (mtf.tail)
- mtf.tail->next = file;
- mtf.tail = file;
- if (mtf.head == NULL)
- mtf.head = file;
- if (file->type == MTF_TABLE && first_table == NULL)
- first_table = file;
+ file->type = MTF_FILE;
+ ll_insert (first_table, &file->ll);
+ mtf.nonempty_files++;
}
- else
+ else if (lex_match_id (lexer, "TABLE"))
{
- assert (file->type == MTF_FILE);
- file->next = first_table;
- file->prev = first_table->prev;
- if (first_table->prev)
- first_table->prev->next = file;
- else
- mtf.head = file;
- first_table->prev = file;
+ file->type = MTF_TABLE;
+ ll_push_tail (&mtf.files, &file->ll);
+ if (first_table == ll_null (&mtf.files))
+ first_table = &file->ll;
}
+ else
+ NOT_REACHED ();
+ lex_match (lexer, '=');
- if (lex_match ('*'))
+ if (lex_match (lexer, '*'))
{
- file->handle = NULL;
- file->reader = NULL;
-
- if (used_active_file)
- {
- msg (SE, _("The active file may not be specified more "
- "than once."));
- goto error;
- }
- used_active_file = true;
-
- if (!proc_has_source (current_dataset))
+ if (!proc_has_active_file (ds))
{
msg (SE, _("Cannot specify the active file since no active "
"file has been defined."));
goto error;
}
- if (proc_make_temporary_transformations_permanent (current_dataset))
+ if (proc_make_temporary_transformations_permanent (ds))
msg (SE,
_("MATCH FILES may not be used after TEMPORARY when "
"the active file is an input source. "
"Temporary transformations will be made permanent."));
- file->dict = dataset_dict (current_dataset);
+ file->dict = dict_clone (dataset_dict (ds));
}
else
{
- file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
+ file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
if (file->handle == NULL)
goto error;
file->reader = any_reader_open (file->handle, &file->dict);
if (file->reader == NULL)
goto error;
-
- case_create (&file->input, dict_get_next_value_idx (file->dict));
}
- while (lex_match ('/'))
- if (lex_match_id ("RENAME"))
+ while (lex_match (lexer, '/'))
+ if (lex_match_id (lexer, "RENAME"))
{
- if (!rename_variables (file->dict))
- goto error;
+ if (!rename_variables (lexer, file->dict))
+ goto error;
}
- else if (lex_match_id ("IN"))
+ else if (lex_match_id (lexer, "IN"))
{
- lex_match ('=');
- if (token != T_ID)
+ lex_match (lexer, '=');
+ if (lex_token (lexer) != T_ID)
{
- lex_error (NULL);
+ lex_error (lexer, NULL);
goto error;
}
"TABLE."));
goto error;
}
- file->in_name = xstrdup (tokid);
- lex_get ();
+ file->in_name = xstrdup (lex_tokid (lexer));
+ lex_get (lexer);
saw_in = true;
}
mtf_merge_dictionary (mtf.dict, file);
}
-
- while (token != '.')
+
+ while (lex_token (lexer) != '.')
{
- if (lex_match (T_BY))
+ if (lex_match (lexer, T_BY))
{
+ struct mtf_file *file;
struct variable **by;
-
+ bool ok;
+
if (mtf.by_cnt)
{
- msg (SE, _("BY may appear at most once."));
+ lex_sbc_only_once ("BY");
goto error;
}
-
- lex_match ('=');
- if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
+
+ lex_match (lexer, '=');
+ if (!parse_variables (lexer, mtf.dict, &by, &mtf.by_cnt,
PV_NO_DUPLICATE | PV_NO_SCRATCH))
goto error;
- for (iter = mtf.head; iter != NULL; iter = iter->next)
+ ok = true;
+ ll_for_each (file, struct mtf_file, ll, &mtf.files)
{
size_t i;
-
- iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
+ file->by = xnmalloc (mtf.by_cnt, sizeof *file->by);
for (i = 0; i < mtf.by_cnt; i++)
{
- iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
- if (iter->by[i] == NULL)
+ const char *var_name = var_get_name (by[i]);
+ file->by[i] = dict_lookup_var (file->dict, var_name);
+ if (file->by[i] == NULL)
{
- msg (SE, _("File %s lacks BY variable %s."),
- iter->handle ? fh_get_name (iter->handle) : "*",
- by[i]->name);
- free (by);
- goto error;
+ if (file->handle != NULL)
+ msg (SE, _("File %s lacks BY variable %s."),
+ fh_get_name (file->handle), var_name);
+ else
+ msg (SE, _("Active file lacks BY variable %s."),
+ var_name);
+ ok = false;
}
}
}
free (by);
+
+ if (!ok)
+ goto error;
}
- else if (lex_match_id ("FIRST"))
+ else if (lex_match_id (lexer, "FIRST"))
{
- if (mtf.first[0] != '\0')
+ if (first_name[0] != '\0')
{
- msg (SE, _("FIRST may appear at most once."));
+ lex_sbc_only_once ("FIRST");
goto error;
}
-
- lex_match ('=');
- if (!lex_force_id ())
+
+ lex_match (lexer, '=');
+ if (!lex_force_id (lexer))
goto error;
- strcpy (mtf.first, tokid);
- lex_get ();
+ strcpy (first_name, lex_tokid (lexer));
+ lex_get (lexer);
}
- else if (lex_match_id ("LAST"))
+ else if (lex_match_id (lexer, "LAST"))
{
- if (mtf.last[0] != '\0')
+ if (last_name[0] != '\0')
{
- msg (SE, _("LAST may appear at most once."));
+ lex_sbc_only_once ("LAST");
goto error;
}
-
- lex_match ('=');
- if (!lex_force_id ())
+
+ lex_match (lexer, '=');
+ if (!lex_force_id (lexer))
goto error;
- strcpy (mtf.last, tokid);
- lex_get ();
+ strcpy (last_name, lex_tokid (lexer));
+ lex_get (lexer);
}
- else if (lex_match_id ("MAP"))
+ else if (lex_match_id (lexer, "MAP"))
{
/* FIXME. */
}
- else if (lex_match_id ("DROP"))
+ else if (lex_match_id (lexer, "DROP"))
{
- if (!drop_variables (mtf.dict))
+ if (!drop_variables (lexer, mtf.dict))
goto error;
}
- else if (lex_match_id ("KEEP"))
+ else if (lex_match_id (lexer, "KEEP"))
{
- if (!keep_variables (mtf.dict))
+ if (!keep_variables (lexer, mtf.dict))
goto error;
}
else
{
- lex_error (NULL);
+ lex_error (lexer, NULL);
goto error;
}
- if (!lex_match ('/') && token != '.')
+ if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
{
- lex_end_of_command ();
+ lex_end_of_command (lexer);
goto error;
}
}
if (mtf.by_cnt == 0)
{
- if (saw_table)
+ if (first_table != ll_null (&mtf.files))
{
msg (SE, _("BY is required when TABLE is specified."));
goto error;
/* Set up mapping from each file's variables to master
variables. */
- for (iter = mtf.head; iter != NULL; iter = iter->next)
+ ll_for_each (file, struct mtf_file, ll, &mtf.files)
{
- struct dictionary *d = iter->dict;
- int i;
+ size_t in_var_cnt = dict_get_var_cnt (file->dict);
- for (i = 0; i < dict_get_var_cnt (d); i++)
+ file->vars = xnmalloc (in_var_cnt, sizeof *file->vars);
+ file->var_cnt = 0;
+ for (i = 0; i < in_var_cnt; i++)
{
- struct variable *v = dict_get_var (d, i);
- struct variable *mv = dict_lookup_var (mtf.dict, v->name);
- if (mv != NULL)
- set_master (v, mv);
+ struct variable *in_var = dict_get_var (file->dict, i);
+ struct variable *out_var = dict_lookup_var (mtf.dict,
+ var_get_name (in_var));
+
+ if (out_var != NULL)
+ {
+ struct mtf_variable *mv = &file->vars[file->var_cnt++];
+ mv->in_var = in_var;
+ mv->out_var = out_var;
+ }
}
}
- /* Add IN variables to master dictionary. */
- for (iter = mtf.head; iter != NULL; iter = iter->next)
- if (iter->in_name != NULL)
- {
- iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
- if (iter->in_var == NULL)
- {
- msg (SE, _("IN variable name %s duplicates an "
- "existing variable name."),
- iter->in_var->name);
- goto error;
- }
- iter->in_var->print = iter->in_var->write
- = make_output_format (FMT_F, 1, 0);
- }
-
- /* MATCH FILES performs an n-way merge on all its input files.
- Abstract algorithm:
-
- 1. Read one input record from every input FILE.
-
- 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
-
- 3. Find the FILE input record(s) that have minimum BY
- values. Store all the values from these input records into
- the output record.
-
- 4. For every TABLE, read another record as long as the BY values
- on the TABLE's input record are less than the FILEs' BY values.
- If an exact match is found, store all the values from the TABLE
- input record into the output record.
-
- 5. Write the output record.
-
- 6. Read another record from each input file FILE and TABLE that
- we stored values from above. If we come to the end of one of the
- input files, remove it from the list of input files.
-
- 7. Repeat from step 2.
-
- Unfortunately, this algorithm can't be implemented in a
- straightforward way because there's no function to read a
- record from the active file. Instead, it has to be written
- as a state machine.
-
- FIXME: For merging large numbers of files (more than 10?) a
- better algorithm would use a heap for finding minimum
- values. */
-
- if (!used_active_file)
- discard_variables (current_dataset);
+ /* Add IN, FIRST, and LAST variables to master dictionary. */
+ ll_for_each (file, struct mtf_file, ll, &mtf.files)
+ if (!create_flag_var ("IN", file->in_name, mtf.dict, &file->in_var))
+ goto error;
+ if (!create_flag_var ("FIRST", first_name, mtf.dict, &mtf.first)
+ || !create_flag_var ("LAST", last_name, mtf.dict, &mtf.last))
+ goto error;
dict_compact_values (mtf.dict);
- mtf.output = fastfile_create (dict_get_next_value_idx (mtf.dict));
- mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
- case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
-
- if (!mtf_read_nonactive_records (&mtf))
- goto error;
+ mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
+ taint = taint_clone (casewriter_get_taint (mtf.output));
- if (used_active_file)
+ ll_for_each (file, struct mtf_file, ll, &mtf.files)
{
- proc_set_sink (current_dataset,
- create_case_sink (&null_sink_class,
- dataset_dict (current_dataset), NULL));
- ok = procedure (current_dataset,mtf_processing, &mtf) && mtf_processing_finish (&mtf);
+ if (file->reader == NULL)
+ {
+ if (active_file == NULL)
+ {
+ proc_discard_output (ds);
+ file->reader = active_file = proc_open (ds);
+ }
+ else
+ file->reader = casereader_clone (active_file);
+ }
+ taint_propagate (casereader_get_taint (file->reader), taint);
}
- else
- ok = mtf_processing_finish (&mtf);
- discard_variables (current_dataset);
+ ll_for_each_safe (file, next, struct mtf_file, ll, &mtf.files)
+ mtf_read_record (&mtf, file);
+ while (mtf.nonempty_files > 0)
+ mtf_process_case (&mtf);
+ if ((mtf.first != NULL || mtf.last != NULL) && mtf.prev_BY != NULL)
+ {
+ if (mtf.last != NULL)
+ case_data_rw (&mtf.buffered_case, mtf.last)->f = 1.0;
+ casewriter_write (mtf.output, &mtf.buffered_case);
+ case_nullify (&mtf.buffered_case);
+ }
+ mtf_close_all_files (&mtf);
+ if (active_file != NULL)
+ proc_commit (ds);
- dict_destroy (dataset_dict (current_dataset));
- dataset_set_dict (current_dataset, mtf.dict);
+ proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
mtf.dict = NULL;
- proc_set_source (current_dataset, storage_source_create (mtf.output));
mtf.output = NULL;
-
- if (!mtf_free (&mtf))
- ok = false;
- return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
-
+
+ mtf_free (&mtf);
+
+ return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
+
error:
+ if (active_file != NULL)
+ proc_commit (ds);
mtf_free (&mtf);
+ taint_destroy (taint);
return CMD_CASCADING_FAILURE;
}
-/* Repeats 2...7 an arbitrary number of times. */
+/* If VAR_NAME is a nonnull pointer to a non-empty string,
+ attempts to create a variable named VAR_NAME, with format
+ F1.0, in DICT, and stores a pointer to the variable in *VAR.
+ Returns true if successful, false if the variable name is a
+ duplicate (in which case a message saying that the variable
+ specified on the given SUBCOMMAND is a duplicate is emitted).
+ Also returns true, without doing anything, if VAR_NAME is null
+ or empty. */
static bool
-mtf_processing_finish (void *mtf_)
+create_flag_var (const char *subcommand, const char *var_name,
+ struct dictionary *dict, struct variable **var)
{
- struct mtf_proc *mtf = mtf_;
- struct mtf_file *iter;
-
- /* Find the active file and delete it. */
- for (iter = mtf->head; iter; iter = iter->next)
- if (iter->handle == NULL)
- {
- if (!mtf_delete_file_in_place (mtf, &iter))
- NOT_REACHED ();
- break;
- }
-
- while (mtf->head && mtf->head->type == MTF_FILE)
- if (!mtf_processing (NULL, mtf))
- return false;
-
+ if (var_name != NULL && var_name[0] != '\0')
+ {
+ struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
+ *var = dict_create_var (dict, var_name, 0);
+ if (*var == NULL)
+ {
+ msg (SE, _("Variable name %s specified on %s subcommand "
+ "duplicates an existing variable name."),
+ subcommand, var_name);
+ return false;
+ }
+ var_set_both_formats (*var, &format);
+ }
+ else
+ *var = NULL;
return true;
}
-/* Return a string in a static buffer describing V's variable type and
- width. */
+/* Return a string in an allocated buffer describing V's variable
+ type and width. */
static char *
var_type_description (struct variable *v)
{
- static char buf[2][32];
- static int x = 0;
- char *s;
-
- x ^= 1;
- s = buf[x];
-
- if (v->type == NUMERIC)
- strcpy (s, "numeric");
+ if (var_is_numeric (v))
+ return xstrdup ("numeric");
else
- {
- assert (v->type == ALPHA);
- sprintf (s, "string with width %d", v->width);
- }
- return s;
+ return xasprintf ("string with width %d", var_get_width (v));
}
-/* Closes FILE and frees its associated data.
- Returns true if successful, false if an I/O error
- occurred on FILE. */
+/* Closes all the files in MTF and frees their associated data.
+ Returns true if successful, false if an I/O error occurred on
+ any of the files. */
static bool
-mtf_close_file (struct mtf_file *file)
+mtf_close_all_files (struct mtf_proc *mtf)
{
- bool ok = file->reader == NULL || !any_reader_error (file->reader);
- free (file->by);
- any_reader_close (file->reader);
- if (file->handle != NULL)
- dict_destroy (file->dict);
- case_destroy (&file->input);
- free (file->in_name);
- free (file);
- return ok;
-}
-
-/* Free all the data for the MATCH FILES procedure.
- Returns true if successful, false if an I/O error
- occurred. */
-static bool
-mtf_free (struct mtf_proc *mtf)
-{
- struct mtf_file *iter, *next;
+ struct mtf_file *file;
bool ok = true;
- for (iter = mtf->head; iter; iter = next)
+ ll_for_each_preremove (file, struct mtf_file, ll, &mtf->files)
{
- next = iter->next;
- assert (iter->dict != mtf->dict);
- if (!mtf_close_file (iter))
- ok = false;
+ casereader_destroy (file->reader);
+ free (file->by);
+ dict_destroy (file->dict);
+ free (file->in_name);
+ case_destroy (&file->input);
+ free (file->vars);
+ free (file);
}
-
- if (mtf->dict)
- dict_destroy (mtf->dict);
- case_destroy (&mtf->mtf_case);
- free (mtf->seq_nums);
return ok;
}
-/* Remove *FILE from the mtf_file chain. Make *FILE point to the next
- file in the chain, or to NULL if was the last in the chain.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
+/* Frees all the data for the MATCH FILES procedure. */
+static void
+mtf_free (struct mtf_proc *mtf)
{
- struct mtf_file *f = *file;
- int i;
-
- if (f->prev)
- f->prev->next = f->next;
- if (f->next)
- f->next->prev = f->prev;
- if (f == mtf->head)
- mtf->head = f->next;
- if (f == mtf->tail)
- mtf->tail = f->prev;
- *file = f->next;
-
- if (f->in_var != NULL)
- case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
- for (i = 0; i < dict_get_var_cnt (f->dict); i++)
- {
- struct variable *v = dict_get_var (f->dict, i);
- struct variable *mv = get_master (v);
- if (mv != NULL)
- {
- union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
-
- if (v->type == NUMERIC)
- out->f = SYSMIS;
- else
- memset (out->s, ' ', v->width);
- }
- }
-
- return mtf_close_file (f);
+ mtf_close_all_files (mtf);
+ dict_destroy (mtf->dict);
+ casewriter_destroy (mtf->output);
+ case_destroy (&mtf->buffered_case);
+ case_destroy (&mtf->prev_BY_case);
}
-/* Read a record from every input file except the active file.
- Returns true if successful, false if an I/O error occurred. */
+/* Reads the next record into FILE, if possible, and update MTF's
+ nonempty_files count if not. */
static bool
-mtf_read_nonactive_records (void *mtf_)
+mtf_read_record (struct mtf_proc *mtf, struct mtf_file *file)
{
- struct mtf_proc *mtf = mtf_;
- struct mtf_file *iter, *next;
- bool ok = true;
-
- for (iter = mtf->head; ok && iter != NULL; iter = next)
+ case_destroy (&file->input);
+ if (!casereader_read (file->reader, &file->input))
{
- next = iter->next;
- if (iter->handle && !any_reader_read (iter->reader, &iter->input))
- if (!mtf_delete_file_in_place (mtf, &iter))
- ok = false;
+ mtf->nonempty_files--;
+ return false;
}
- return ok;
+ else
+ return true;
}
-/* Compare the BY variables for files A and B; return -1 if A < B, 0
- if A == B, 1 if A > B. */
+/* Compare the BY variables for files A and B; return -1 if A <
+ B, 0 if A == B, 1 if A > B. (If there are no BY variables,
+ then all records are equal.) */
static inline int
mtf_compare_BY_values (struct mtf_proc *mtf,
- struct mtf_file *a, struct mtf_file *b,
- const struct ccase *c)
+ struct mtf_file *a, struct mtf_file *b)
{
- const struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
- const struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
- assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
- return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
+ return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
}
-/* Perform one iteration of steps 3...7 above.
- Returns true if successful, false if an I/O error occurred. */
-static bool
-mtf_processing (const struct ccase *c, void *mtf_)
+/* Processes input files and write one case to the output file. */
+static void
+mtf_process_case (struct mtf_proc *mtf)
{
- struct mtf_proc *mtf = mtf_;
+ struct ccase c;
+ struct mtf_file *min;
+ struct mtf_file *file;
+ int min_sequence;
+ size_t i;
- /* Do we need another record from the active file? */
- bool read_active_file;
+ /* Find the set of one or more FILEs whose BY values are
+ minimal, as well as the set of zero or more TABLEs whose BY
+ values equal those of the minimum FILEs.
+
+ After each iteration of the loop, this invariant holds: the
+ FILEs with minimum BY values thus far have "sequence"
+ members equal to min_sequence, and "min" points to one of
+ the mtf_files whose case has those minimum BY values, and
+ similarly for TABLEs. */
+ min_sequence = 0;
+ min = NULL;
+ ll_for_each (file, struct mtf_file, ll, &mtf->files)
+ if (case_is_null (&file->input))
+ file->sequence = -1;
+ else if (file->type == MTF_FILE)
+ {
+ int cmp = min != NULL ? mtf_compare_BY_values (mtf, min, file) : 1;
+ if (cmp <= 0)
+ file->sequence = cmp < 0 ? -1 : min_sequence;
+ else
+ {
+ file->sequence = ++min_sequence;
+ min = file;
+ }
+ }
+ else
+ {
+ int cmp;
+ assert (min != NULL);
+ do
+ {
+ cmp = mtf_compare_BY_values (mtf, min, file);
+ }
+ while (cmp > 0 && mtf_read_record (mtf, file));
+ file->sequence = cmp == 0 ? min_sequence : -1;
+ }
- assert (mtf->head != NULL);
- if (mtf->head->type == MTF_TABLE)
- return true;
-
- do
+ /* Form the output case from the input cases. */
+ case_create (&c, dict_get_next_value_idx (mtf->dict));
+ for (i = 0; i < dict_get_var_cnt (mtf->dict); i++)
+ {
+ struct variable *v = dict_get_var (mtf->dict, i);
+ value_set_missing (case_data_rw (&c, v), var_get_width (v));
+ }
+ ll_for_each_reverse (file, struct mtf_file, ll, &mtf->files)
+ {
+ bool include_file = file->sequence == min_sequence;
+ if (include_file)
+ for (i = 0; i < file->var_cnt; i++)
+ {
+ const struct mtf_variable *mv = &file->vars[i];
+ const union value *in = case_data (&file->input, mv->in_var);
+ union value *out = case_data_rw (&c, mv->out_var);
+ value_copy (out, in, var_get_width (mv->in_var));
+ }
+ if (file->in_var != NULL)
+ case_data_rw (&c, file->in_var)->f = include_file;
+ }
+
+ /* Write the output case. */
+ if (mtf->first == NULL && mtf->last == NULL)
+ {
+ /* With no FIRST or LAST variables, it's trivial. */
+ casewriter_write (mtf->output, &c);
+ }
+ else
{
- struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
- struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
- struct mtf_file *iter, *next;
-
- read_active_file = false;
-
- /* 3. Find the FILE input record(s) that have minimum BY
- values. Store all the values from these input records into
- the output record. */
- min_head = min_tail = mtf->head;
- max_head = max_tail = NULL;
- for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
- iter = iter->next)
+ /* It's harder with LAST, because we can't know whether
+ this case is the last in a group until we've prepared
+ the *next* case also. Thus, we buffer the previous
+ output case until the next one is ready.
+
+ We also have to save a copy of one of the previous input
+ cases, so that we can compare the BY variables. We
+ can't compare the BY variables between the current
+ output case and the saved one because the BY variables
+ might not be in the output (the user is allowed to drop
+ them). */
+ bool new_BY;
+ if (mtf->prev_BY != NULL)
{
- int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
- if (cmp < 0)
- {
- if (max_head)
- max_tail = max_tail->next_min = iter;
- else
- max_head = max_tail = iter;
- }
- else if (cmp == 0)
- min_tail = min_tail->next_min = iter;
- else /* cmp > 0 */
- {
- if (max_head)
- {
- max_tail->next_min = min_head;
- max_tail = min_tail;
- }
- else
- {
- max_head = min_head;
- max_tail = min_tail;
- }
- min_head = min_tail = iter;
- }
+ new_BY = case_compare_2dict (&min->input, &mtf->prev_BY_case,
+ min->by, mtf->prev_BY,
+ mtf->by_cnt);
+ if (mtf->last != NULL)
+ case_data_rw (&mtf->buffered_case, mtf->last)->f = new_BY;
+ casewriter_write (mtf->output, &mtf->buffered_case);
}
-
- /* 4. For every TABLE, read another record as long as the BY
- values on the TABLE's input record are less than the FILEs'
- BY values. If an exact match is found, store all the values
- from the TABLE input record into the output record. */
- for (; iter != NULL; iter = next)
- {
- assert (iter->type == MTF_TABLE);
-
- next = iter->next;
- for (;;)
- {
- int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
- if (cmp < 0)
- {
- if (max_head)
- max_tail = max_tail->next_min = iter;
- else
- max_head = max_tail = iter;
- }
- else if (cmp == 0)
- min_tail = min_tail->next_min = iter;
- else /* cmp > 0 */
- {
- if (iter->handle == NULL)
- return true;
- if (any_reader_read (iter->reader, &iter->input))
- continue;
- if (!mtf_delete_file_in_place (mtf, &iter))
- return false;
- }
- break;
- }
- }
-
- /* Next sequence number. */
- mtf->seq_num++;
-
- /* Store data to all the records we are using. */
- if (min_tail)
- min_tail->next_min = NULL;
- for (iter = min_head; iter; iter = iter->next_min)
- {
- int i;
-
- for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
- {
- struct variable *v = dict_get_var (iter->dict, i);
- struct variable *mv = get_master (v);
-
- if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
- {
- const struct ccase *record
- = case_is_null (&iter->input) ? c : &iter->input;
- union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
-
- mtf->seq_nums[mv->index] = mtf->seq_num;
- if (v->type == NUMERIC)
- out->f = case_num (record, v->fv);
- else
- memcpy (out->s, case_str (record, v->fv), v->width);
- }
- }
- if (iter->in_var != NULL)
- case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
-
- if (iter->type == MTF_FILE && iter->handle == NULL)
- read_active_file = true;
- }
-
- /* Store missing values to all the records we're not
- using. */
- if (max_tail)
- max_tail->next_min = NULL;
- for (iter = max_head; iter; iter = iter->next_min)
- {
- int i;
-
- for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
- {
- struct variable *v = dict_get_var (iter->dict, i);
- struct variable *mv = get_master (v);
-
- if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
- {
- union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
- mtf->seq_nums[mv->index] = mtf->seq_num;
-
- if (v->type == NUMERIC)
- out->f = SYSMIS;
- else
- memset (out->s, ' ', v->width);
- }
- }
- if (iter->in_var != NULL)
- case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
- }
+ else
+ new_BY = true;
- /* 5. Write the output record. */
- casefile_append (mtf->output, &mtf->mtf_case);
+ case_move (&mtf->buffered_case, &c);
+ if (mtf->first != NULL)
+ case_data_rw (&mtf->buffered_case, mtf->first)->f = new_BY;
- /* 6. Read another record from each input file FILE and TABLE
- that we stored values from above. If we come to the end of
- one of the input files, remove it from the list of input
- files. */
- for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
- {
- next = iter->next_min;
- if (iter->reader != NULL
- && !any_reader_read (iter->reader, &iter->input))
- if (!mtf_delete_file_in_place (mtf, &iter))
- return false;
- }
+ if (new_BY)
+ {
+ mtf->prev_BY = min->by;
+ case_destroy (&mtf->prev_BY_case);
+ case_clone (&mtf->prev_BY_case, &min->input);
+ }
}
- while (!read_active_file
- && mtf->head != NULL && mtf->head->type == MTF_FILE);
- return true;
+ /* Read another record from each input file FILE with minimum
+ values. */
+ ll_for_each (file, struct mtf_file, ll, &mtf->files)
+ if (file->type == MTF_FILE)
+ {
+ if (file->sequence == min_sequence)
+ mtf_read_record (mtf, file);
+ }
+ else
+ break;
}
/* Merge the dictionary for file F into master dictionary M. */
-static int
+static bool
mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
{
struct dictionary *d = f->dict;
d_docs = dict_get_documents (d);
m_docs = dict_get_documents (m);
- if (d_docs != NULL)
+ if (d_docs != NULL)
{
if (m_docs == NULL)
dict_set_documents (m, d_docs);
else
{
- char *new_docs;
- size_t new_len;
-
- new_len = strlen (m_docs) + strlen (d_docs);
- new_docs = xmalloc (new_len + 1);
- strcpy (new_docs, m_docs);
- strcat (new_docs, d_docs);
+ char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
dict_set_documents (m, new_docs);
free (new_docs);
}
}
-
+
for (i = 0; i < dict_get_var_cnt (d); i++)
{
struct variable *dv = dict_get_var (d, i);
- struct variable *mv = dict_lookup_var (m, dv->name);
+ struct variable *mv = dict_lookup_var (m, var_get_name (dv));
- if (dict_class_from_id (dv->name) == DC_SCRATCH)
+ if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
continue;
if (mv != NULL)
{
- if (mv->width != dv->width)
+ if (var_get_width (mv) != var_get_width (dv))
{
+ char *dv_description = var_type_description (dv);
+ char *mv_description = var_type_description (mv);
msg (SE, _("Variable %s in file %s (%s) has different "
"type or width from the same variable in "
"earlier file (%s)."),
- dv->name, fh_get_name (f->handle),
- var_type_description (dv), var_type_description (mv));
- return 0;
+ var_get_name (dv), fh_get_name (f->handle),
+ dv_description, mv_description);
+ free (dv_description);
+ free (mv_description);
+ return false;
}
-
- if (dv->width == mv->width)
+
+ if (var_get_width (dv) == var_get_width (mv))
{
- if (val_labs_count (dv->val_labs)
- && !val_labs_count (mv->val_labs))
- {
- val_labs_destroy (mv->val_labs);
- mv->val_labs = val_labs_copy (dv->val_labs);
- }
- if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
- mv_copy (&mv->miss, &dv->miss);
+ if (var_has_value_labels (dv) && !var_has_value_labels (mv))
+ var_set_value_labels (mv, var_get_value_labels (dv));
+ if (var_has_missing_values (dv) && !var_has_missing_values (mv))
+ var_set_missing_values (mv, var_get_missing_values (dv));
}
- if (dv->label && !mv->label)
- mv->label = xstrdup (dv->label);
+ if (var_get_label (dv) && !var_get_label (mv))
+ var_set_label (mv, var_get_label (dv));
}
else
- mv = dict_clone_var_assert (m, dv, dv->name);
+ mv = dict_clone_var_assert (m, dv, var_get_name (dv));
}
- return 1;
-}
-
-/* Marks V's master variable as MASTER. */
-static void
-set_master (struct variable *v, struct variable *master)
-{
- var_attach_aux (v, master, NULL);
-}
-
-/* Returns the master variable corresponding to V,
- as set with set_master(). */
-static struct variable *
-get_master (struct variable *v)
-{
- return v->aux;
+ return true;
}
-\f
-
\f
/* Case map.
Uses D's aux members, which must otherwise not be in use. */
static void
-start_case_map (struct dictionary *d)
+start_case_map (struct dictionary *d)
{
size_t var_cnt = dict_get_var_cnt (d);
size_t i;
-
+
for (i = 0; i < var_cnt; i++)
{
struct variable *v = dict_get_var (d, i);
int *src_fv = xmalloc (sizeof *src_fv);
- *src_fv = v->fv;
+ *src_fv = var_get_case_index (v);
var_attach_aux (v, src_fv, var_dtor_free);
}
}
Returns the new case map, or a null pointer if no mapping is
required (that is, no data has changed position). */
static struct case_map *
-finish_case_map (struct dictionary *d)
+finish_case_map (struct dictionary *d)
{
struct case_map *map;
size_t var_cnt = dict_get_var_cnt (d);
map->map[i] = -1;
identity_map = 1;
- for (i = 0; i < var_cnt; i++)
+ for (i = 0; i < var_cnt; i++)
{
struct variable *v = dict_get_var (d, i);
+ size_t value_cnt = var_get_value_cnt (v);
int *src_fv = (int *) var_detach_aux (v);
size_t idx;
- if (v->fv != *src_fv)
+ if (var_get_case_index (v) != *src_fv)
identity_map = 0;
-
- for (idx = 0; idx < v->nv; idx++)
+
+ for (idx = 0; idx < value_cnt; idx++)
{
int src_idx = *src_fv + idx;
- int dst_idx = v->fv + idx;
-
+ int dst_idx = var_get_case_index (v) + idx;
+
assert (map->map[dst_idx] == -1);
map->map[dst_idx] = src_idx;
}
free (src_fv);
}
- if (identity_map)
+ if (identity_map)
{
destroy_case_map (map);
return NULL;
/* Maps from SRC to DST, applying case map MAP. */
static void
map_case (const struct case_map *map,
- const struct ccase *src, struct ccase *dst)
+ const struct ccase *src, struct ccase *dst)
{
size_t dst_idx;
- assert (map != NULL);
- assert (src != NULL);
- assert (dst != NULL);
- assert (src != dst);
-
+ case_create (dst, map->value_cnt);
for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
{
int src_idx = map->map[dst_idx];
if (src_idx != -1)
- *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
+ *case_data_rw_idx (dst, dst_idx) = *case_data_idx (src, src_idx);
}
}
/* Destroys case map MAP. */
static void
-destroy_case_map (struct case_map *map)
+destroy_case_map (struct case_map *map)
{
- if (map != NULL)
+ if (map != NULL)
{
free (map->map);
free (map);