02111-1307, USA. */
#include <config.h>
-#include <assert.h>
+#include "error.h"
#include <stdlib.h>
#include "alloc.h"
-#include "avl.h"
+#include "case.h"
#include "command.h"
#include "error.h"
#include "file-handle.h"
+#include "hash.h"
#include "lexer.h"
#include "misc.h"
#include "pfm.h"
#include "settings.h"
#include "sfm.h"
#include "str.h"
+#include "value-labels.h"
#include "var.h"
#include "vfm.h"
#include "vfmP.h"
-#undef DEBUGGING
-/*#define DEBUGGING 1*/
#include "debug-print.h"
+/* GET or IMPORT input program. */
+struct get_pgm
+ {
+ struct file_handle *handle; /* File to GET or IMPORT from. */
+ size_t case_size; /* Case size in bytes. */
+ };
+
/* XSAVE transformation (and related SAVE, EXPORT procedures). */
struct save_trns
{
struct trns_header h;
struct file_handle *f; /* Associated system file. */
int nvar; /* Number of variables. */
- int *var; /* Indices of variables. */
+ struct variable **var; /* Variables. */
flt64 *case_buf; /* Case transfer buffer. */
};
#define GTSV_OPT_MATCH_FILES 004 /* The MATCH FILES procedure. */
#define GTSV_OPT_NONE 0
-/* The file being read by the input program. */
-static struct file_handle *get_file;
-
-/* The transformation being used by the SAVE procedure. */
-static struct save_trns *trns;
-
static int trim_dictionary (struct dictionary * dict, int *options);
-static int save_write_case_func (struct ccase *);
-static int save_trns_proc (struct trns_header *, struct ccase *);
-static void save_trns_free (struct trns_header *);
-
-#if DEBUGGING
-void dump_dict_variables (struct dictionary *);
-#endif
+static int save_write_case_func (struct ccase *, void *);
+static trns_proc_func save_trns_proc;
+static trns_free_func save_trns_free;
/* Parses the GET command. */
int
{
struct file_handle *handle;
struct dictionary *dict;
+ struct get_pgm *pgm;
int options = GTSV_OPT_NONE;
- int i;
- int nval;
-
- lex_match_id ("GET");
discard_variables ();
lex_match ('/');
if (dict == NULL)
return CMD_FAILURE;
-#if DEBUGGING
- dump_dict_variables (dict);
-#endif
if (0 == trim_dictionary (dict, &options))
{
fh_close_handle (handle);
return CMD_FAILURE;
}
-#if DEBUGGING
- dump_dict_variables (dict);
-#endif
-
- /* Set the fv and lv elements of all variables remaining in the
- dictionary. */
- nval = 0;
- for (i = 0; i < dict->nvar; i++)
- {
- struct variable *v = dict->var[i];
- v->fv = nval;
- nval += v->nv;
- }
- dict->nval = nval;
- assert (nval);
+ dict_compact_values (dict);
-#if DEBUGGING
- printf (_("GET translation table from file to memory:\n"));
- for (i = 0; i < dict->nvar; i++)
- {
- struct variable *v = dict->var[i];
-
- printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name,
- v->get.fv, v->get.nv, v->fv, v->nv);
- }
-#endif
+ dict_destroy (default_dict);
+ default_dict = dict;
- restore_dictionary (dict);
-
- vfm_source = &get_source;
- get_file = handle;
+ pgm = xmalloc (sizeof *pgm);
+ pgm->handle = handle;
+ pgm->case_size = dict_get_case_size (default_dict);
+ vfm_source = create_case_source (&get_source_class, default_dict, pgm);
return CMD_SUCCESS;
}
-/* Parses the SAVE (for X==0) and XSAVE (for X==1) commands. */
-/* FIXME: save_dictionary() is too expensive. It would make more
- sense to copy just the first few fields of each variables (up to
- `foo'): that's a SMOP. */
-int
-cmd_save_internal (int x)
+/* SAVE or XSAVE command? */
+enum save_cmd
+ {
+ CMD_SAVE,
+ CMD_XSAVE
+ };
+
+/* Parses the SAVE and XSAVE commands. */
+static int
+cmd_save_internal (enum save_cmd save_cmd)
{
struct file_handle *handle;
struct dictionary *dict;
int i;
- lex_match_id ("SAVE");
-
lex_match ('/');
if (lex_match_id ("OUTFILE"))
lex_match ('=');
if (handle == NULL)
return CMD_FAILURE;
- dict = save_dictionary ();
-#if DEBUGGING
- dump_dict_variables (dict);
-#endif
- for (i = 0; i < dict->nvar; i++)
- dict->var[i]->foo = i;
+ dict = dict_clone (default_dict);
+ for (i = 0; i < dict_get_var_cnt (dict); i++)
+ dict_get_var (dict, i)->aux = dict_get_var (default_dict, i);
if (0 == trim_dictionary (dict, &options))
{
fh_close_handle (handle);
return CMD_FAILURE;
}
-#if DEBUGGING
- dump_dict_variables (dict);
-#endif
-
/* Write dictionary. */
inf.h = handle;
inf.dict = dict;
inf.compress = !!(options & GTSV_OPT_COMPRESSED);
if (!sfm_write_dictionary (&inf))
{
- free_dictionary (dict);
+ dict_destroy (dict);
fh_close_handle (handle);
return CMD_FAILURE;
}
/* Fill in transformation structure. */
- t = trns = xmalloc (sizeof *t);
+ t = xmalloc (sizeof *t);
t->h.proc = save_trns_proc;
t->h.free = save_trns_free;
t->f = handle;
- t->nvar = dict->nvar;
- t->var = xmalloc (sizeof *t->var * dict->nvar);
- for (i = 0; i < dict->nvar; i++)
- t->var[i] = dict->var[i]->foo;
+ t->nvar = dict_get_var_cnt (dict);
+ t->var = xmalloc (sizeof *t->var * t->nvar);
+ for (i = 0; i < t->nvar; i++)
+ t->var[i] = dict_get_var (dict, i)->aux;
t->case_buf = xmalloc (sizeof *t->case_buf * inf.case_size);
- free_dictionary (dict);
+ dict_destroy (dict);
- if (x == 0)
- /* SAVE. */
+ if (save_cmd == CMD_SAVE)
{
- procedure (NULL, save_write_case_func, NULL);
- save_trns_free ((struct trns_header *) t);
+ procedure (save_write_case_func, t);
+ save_trns_free (&t->h);
+ }
+ else
+ {
+ assert (save_cmd == CMD_XSAVE);
+ add_transformation (&t->h);
}
- else
- /* XSAVE. */
- add_transformation ((struct trns_header *) t);
return CMD_SUCCESS;
}
int
cmd_save (void)
{
- return cmd_save_internal (0);
+ return cmd_save_internal (CMD_SAVE);
}
/* Parses the XSAVE transformation command. */
int
cmd_xsave (void)
{
- return cmd_save_internal (1);
-}
-
-static int
-save_write_case_func (struct ccase * c)
-{
- save_trns_proc ((struct trns_header *) trns, c);
- return 1;
+ return cmd_save_internal (CMD_XSAVE);
}
-static int
-save_trns_proc (struct trns_header * t unused, struct ccase * c)
+/* Writes the given C to the file specified by T. */
+static void
+do_write_case (struct save_trns *t, struct ccase *c)
{
- flt64 *p = trns->case_buf;
+ flt64 *p = t->case_buf;
int i;
- for (i = 0; i < trns->nvar; i++)
+ for (i = 0; i < t->nvar; i++)
{
- struct variable *v = default_dict.var[trns->var[i]];
+ struct variable *v = t->var[i];
if (v->type == NUMERIC)
{
- double src = c->data[v->fv].f;
+ double src = case_num (c, v->fv);
if (src == SYSMIS)
*p++ = -FLT64_MAX;
else
}
else
{
- memcpy (p, c->data[v->fv].s, v->width);
+ memcpy (p, case_str (c, v->fv), v->width);
memset (&((char *) p)[v->width], ' ',
REM_RND_UP (v->width, sizeof *p));
p += DIV_RND_UP (v->width, sizeof *p);
}
}
- sfm_write_case (trns->f, trns->case_buf, p - trns->case_buf);
+ sfm_write_case (t->f, t->case_buf, p - t->case_buf);
+}
+
+/* Writes case C to the system file specified on SAVE. */
+static int
+save_write_case_func (struct ccase *c, void *aux UNUSED)
+{
+ do_write_case (aux, c);
+ return 1;
+}
+
+/* Writes case C to the system file specified on XSAVE. */
+static int
+save_trns_proc (struct trns_header *h, struct ccase *c, int case_num UNUSED)
+{
+ struct save_trns *t = (struct save_trns *) h;
+ do_write_case (t, c);
return -1;
}
+/* Frees a SAVE transformation. */
static void
save_trns_free (struct trns_header *pt)
{
free (t);
}
-/* Deletes NV variables from DICT, starting at index FIRST. The
- variables must have consecutive indices. The variables are cleared
- and freed. */
-static void
-dict_delete_run (struct dictionary *dict, int first, int nv)
-{
- int i;
-
- for (i = first; i < first + nv; i++)
- {
- clear_variable (dict, dict->var[i]);
- free (dict->var[i]);
- }
- for (i = first; i < dict->nvar - nv; i++)
- {
- dict->var[i] = dict->var[i + nv];
- dict->var[i]->index -= nv;
- }
- dict->nvar -= nv;
-}
-
static int rename_variables (struct dictionary * dict);
/* The GET and SAVE commands have a common structure after the
*OPTIONS, for the GTSV_OPT_SAVE bit, and writes it, for the
GTSV_OPT_COMPRESSED bit. */
/* FIXME: IN, FIRST, LAST, MAP. */
+/* FIXME? Should we call dict_compact_values() on dict as a
+ final step? */
static int
trim_dictionary (struct dictionary *dict, int *options)
{
- if (set_scompression)
+ if (get_scompression())
*options |= GTSV_OPT_COMPRESSED;
if (*options & GTSV_OPT_SAVE)
{
- int i;
-
/* Delete all the scratch variables. */
- for (i = 0; i < dict->nvar; i++)
- {
- int j;
-
- if (dict->var[i]->name[0] != '#')
- continue;
-
- /* Find a run of variables to be deleted. */
- for (j = i + 1; j < dict->nvar; j++)
- if (dict->var[j]->name[0] != '#')
- break;
-
- /* Actually delete 'em. */
- dict_delete_run (dict, i, j - i);
- }
+ struct variable **v;
+ size_t nv;
+ size_t i;
+
+ v = xmalloc (sizeof *v * dict_get_var_cnt (dict));
+ nv = 0;
+ for (i = 0; i < dict_get_var_cnt (dict); i++)
+ if (dict_class_from_id (dict_get_var (dict, i)->name) == DC_SCRATCH)
+ v[nv++] = dict_get_var (dict, i);
+ dict_delete_vars (dict, v, nv);
+ free (v);
}
while ((*options & GTSV_OPT_MATCH_FILES) || lex_match ('/'))
{
struct variable **v;
int nv;
- int i;
lex_match ('=');
if (!parse_variables (dict, &v, &nv, PV_NONE))
return 0;
-
- /* Loop through the variables to delete. */
- for (i = 0; i < nv;)
- {
- int j;
-
- /* Find a run of variables to be deleted. */
- for (j = i + 1; j < nv; j++)
- if (v[j]->index != v[j - 1]->index + 1)
- break;
-
- /* Actually delete 'em. */
- dict_delete_run (dict, v[i]->index, j - i);
- i = j;
- }
+ dict_delete_vars (dict, v, nv);
+ free (v);
}
else if (lex_match_id ("KEEP"))
{
struct variable **v;
int nv;
+ int i;
lex_match ('=');
if (!parse_variables (dict, &v, &nv, PV_NONE))
return 0;
- /* Reorder the dictionary so that the kept variables are at
- the beginning. */
- {
- int i1;
-
- for (i1 = 0; i1 < nv; i1++)
- {
- int i2 = v[i1]->index;
-
- /* Swap variables with indices i1 and i2. */
- struct variable *t = dict->var[i1];
- dict->var[i1] = dict->var[i2];
- dict->var[i2] = t;
- dict->var[i1]->index = i1;
- dict->var[i2]->index = i2;
- }
-
- free (v);
- }
-
- /* Delete all but the first NV variables from the
- dictionary. */
- {
- int i;
- for (i = nv; i < dict->nvar; i++)
- {
- clear_variable (dict, dict->var[i]);
- free (dict->var[i]);
- }
- }
- dict->var = xrealloc (dict->var, sizeof *dict->var * nv);
- dict->nvar = nv;
+ /* Move the specified variables to the beginning. */
+ dict_reorder_vars (dict, v, nv);
+
+ /* Delete the remaining variables. */
+ v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v);
+ for (i = nv; i < dict_get_var_cnt (dict); i++)
+ v[i - nv] = dict_get_var (dict, i);
+ dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
+ free (v);
}
else if (lex_match_id ("RENAME"))
{
return 0;
}
- if (dict->nvar == 0)
+ if (dict_get_var_cnt (dict) == 0)
{
msg (SE, _("All variables deleted from system file dictionary."));
return 0;
struct variable **v;
char **new_names;
int nv, nn;
+ char *err_name;
int group;
return 0;
if (!strncmp (tokid, v->name, 8))
return 1;
- if (is_dict_varname (dict, tokid))
+ if (dict_lookup_var (dict, tokid) != NULL)
{
msg (SE, _("Cannot rename %s as %s because there already exists "
"a variable named %s. To rename variables with "
return 0;
}
- rename_variable (dict, v, tokid);
+ dict_rename_var (dict, v, tokid);
lex_get ();
return 1;
}
int old_nv = nv;
if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
- goto lossage;
+ goto done;
if (!lex_match ('='))
{
msg (SE, _("`=' expected after variable list."));
- goto lossage;
+ goto done;
}
if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
- goto lossage;
+ goto done;
if (nn != nv)
{
- msg (SE, _("Number of variables on left side of `=' (%d) do not "
+ msg (SE, _("Number of variables on left side of `=' (%d) does not "
"match number of variables on right side (%d), in "
"parenthesized group %d of RENAME subcommand."),
nv - old_nv, nn - old_nv, group);
- goto lossage;
+ goto done;
}
if (!lex_force_match (')'))
- goto lossage;
+ goto done;
group++;
}
- for (i = 0; i < nv; i++)
- avl_force_delete (dict->var_by_name, v[i]);
- for (i = 0; i < nv; i++)
+ if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
{
- strcpy (v[i]->name, new_names[i]);
- if (NULL != avl_insert (dict->var_by_name, v[i]))
- {
- msg (SE, _("Duplicate variables name %s."), v[i]->name);
- goto lossage;
- }
+ msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
+ goto done;
}
success = 1;
-lossage:
- /* The label is a bit of a misnomer, we actually come here on any
- sort of return. */
+done:
for (i = 0; i < nn; i++)
free (new_names[i]);
free (new_names);
return success;
}
-
-#if DEBUGGING
-void
-dump_dict_variables (struct dictionary * dict)
-{
- int i;
-
- printf (_("\nVariables in dictionary:\n"));
- for (i = 0; i < dict->nvar; i++)
- printf ("%s, ", dict->var[i]->name);
- printf ("\n");
-}
-#endif
\f
/* Clears internal state related to GET input procedure. */
static void
-get_source_destroy_source (void)
+get_source_destroy (struct case_source *source)
{
+ struct get_pgm *pgm = source->aux;
+
/* It is not necessary to destroy the dictionary because if we get
to this point then the dictionary is default_dict. */
- fh_close_handle (get_file);
+ fh_close_handle (pgm->handle);
+ free (pgm);
}
-/* Reads all the cases from the data file and passes them to
- write_case(). */
+/* Reads all the cases from the data file into C and passes them
+ to WRITE_CASE one by one, passing WC_DATA. */
static void
-get_source_read (void)
+get_source_read (struct case_source *source,
+ struct ccase *c,
+ write_case_func *write_case, write_case_data wc_data)
{
- while (sfm_read_case (get_file, temp_case->data, &default_dict)
- && write_case ())
+ struct get_pgm *pgm = source->aux;
+
+ while (sfm_read_case (pgm->handle, c, default_dict)
+ && write_case (wc_data))
;
- get_source_destroy_source ();
}
-struct case_stream get_source =
+const struct case_source_class get_source_class =
{
+ "GET",
NULL,
get_source_read,
- NULL,
- NULL,
- get_source_destroy_source,
- NULL,
- "GET",
+ get_source_destroy,
};
\f
/* MATCH FILES. */
-#undef DEBUGGING
-/*#define DEBUGGING 1*/
#include "debug-print.h"
/* File types. */
struct dictionary *dict; /* Dictionary from system file. */
char in[9]; /* Name of the variable from IN=. */
char first[9], last[9]; /* Name of the variables from FIRST=, LAST=. */
- union value *input; /* Input record. */
+ struct ccase input; /* Input record. */
};
-/* All the files mentioned on FILE= or TABLE=. */
-static struct mtf_file *mtf_head, *mtf_tail;
+/* MATCH FILES procedure. */
+struct mtf_proc
+ {
+ struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
+ struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
+
+ struct variable **by; /* Variables on the BY subcommand. */
+ size_t by_cnt; /* Number of variables on BY subcommand. */
-/* Variables on the BY subcommand. */
-static struct variable **mtf_by;
-static int mtf_n_by;
+ struct dictionary *dict; /* Dictionary of output file. */
+ struct case_sink *sink; /* Sink to receive output. */
+ struct ccase *mtf_case; /* Case used for output. */
-/* Master dictionary. */
-static struct dictionary *mtf_master;
+ unsigned seq_num; /* Have we initialized this variable? */
+ unsigned *seq_nums; /* Sequence numbers for each var in dict. */
+ };
-static void mtf_free (void);
-static void mtf_free_file (struct mtf_file *file);
-static int mtf_merge_dictionary (struct mtf_file *f);
-static void mtf_delete_file_in_place (struct mtf_file **file);
+static void mtf_free (struct mtf_proc *);
+static void mtf_free_file (struct mtf_file *);
+static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
+static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
-static void mtf_read_nonactive_records (void);
-static void mtf_processing_finish (void);
-static int mtf_processing (struct ccase *);
+static void mtf_read_nonactive_records (void *);
+static void mtf_processing_finish (void *);
+static int mtf_processing (struct ccase *, void *);
static char *var_type_description (struct variable *);
int
cmd_match_files (void)
{
+ struct mtf_proc mtf;
struct mtf_file *first_table = NULL;
int seen = 0;
- lex_match_id ("MATCH");
- lex_match_id ("FILES");
-
- mtf_head = mtf_tail = NULL;
- mtf_by = NULL;
- mtf_n_by = 0;
- mtf_master = new_dictionary (0);
- mtf_master->N = default_dict.N;
+ mtf.head = mtf.tail = NULL;
+ mtf.by = NULL;
+ mtf.by_cnt = 0;
+ mtf.dict = dict_create ();
+ mtf.sink = NULL;
+ mtf.mtf_case = NULL;
+ mtf.seq_num = 0;
+ mtf.seq_nums = NULL;
+ dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
do
{
seen |= 1;
lex_match ('=');
- if (!parse_variables (mtf_master, &mtf_by, &mtf_n_by,
+ if (!parse_variables (mtf.dict, &mtf.by, &mtf.by_cnt,
PV_NO_DUPLICATE | PV_NO_SCRATCH))
goto lossage;
}
file->in[0] = file->first[0] = file->last[0] = '\0';
file->dict = NULL;
file->by = NULL;
- file->input = NULL;
+ case_nullify (&file->input);
if (lex_match_id ("FILE"))
file->type = MTF_FILE;
if (file->type == MTF_TABLE || first_table == NULL)
{
file->next = NULL;
- file->prev = mtf_tail;
- if (mtf_tail)
- mtf_tail->next = file;
- mtf_tail = file;
- if (mtf_head == NULL)
- mtf_head = file;
+ file->prev = mtf.tail;
+ if (mtf.tail)
+ mtf.tail->next = file;
+ mtf.tail = file;
+ if (mtf.head == NULL)
+ mtf.head = file;
if (file->type == MTF_TABLE && first_table == NULL)
first_table = file;
}
if (first_table->prev)
first_table->prev->next = file;
else
- mtf_head = file;
+ mtf.head = file;
first_table->prev = file;
}
"file has been defined."));
goto lossage;
}
+
+ if (temporary != 0)
+ {
+ msg (SE,
+ _("MATCH FILES may not be used after TEMPORARY when "
+ "the active file is an input source. "
+ "Temporary transformations will be made permanent."));
+ cancel_temporary ();
+ }
}
else
{
file->dict = sfm_read_dictionary (file->handle, NULL);
if (!file->dict)
goto lossage;
+ case_create (&file->input, dict_get_next_value_idx (file->dict));
}
else
- file->dict = &default_dict;
- if (!mtf_merge_dictionary (file))
+ file->dict = default_dict;
+ if (!mtf_merge_dictionary (mtf.dict, file))
goto lossage;
}
else if (lex_id_match ("IN", tokid)
const char *sbc;
char *name;
- if (mtf_tail == NULL)
+ if (mtf.tail == NULL)
{
msg (SE, _("IN, FIRST, and LAST subcommands may not occur "
"before the first FILE or TABLE."));
if (lex_match_id ("IN"))
{
- name = mtf_tail->in;
+ name = mtf.tail->in;
sbc = "IN";
}
else if (lex_match_id ("FIRST"))
{
- name = mtf_tail->first;
+ name = mtf.tail->first;
sbc = "FIRST";
}
else if (lex_match_id ("LAST"))
{
- name = mtf_tail->last;
+ name = mtf.tail->last;
sbc = "LAST";
}
- else
- assert (0);
+ else
+ {
+ assert (0);
+ abort ();
+ }
lex_match ('=');
if (token != T_ID)
strcpy (name, tokid);
lex_get ();
- if (!create_variable (mtf_master, name, NUMERIC, 0))
+ if (!dict_create_var (mtf.dict, name, 0))
{
msg (SE, _("Duplicate variable name %s while creating %s "
"variable."),
{
int options = GTSV_OPT_MATCH_FILES;
- if (mtf_tail == NULL)
+ if (mtf.tail == NULL)
{
msg (SE, _("RENAME, KEEP, and DROP subcommands may not occur "
"before the first FILE or TABLE."));
goto lossage;
}
- if (!trim_dictionary (mtf_tail->dict, &options))
+ if (!trim_dictionary (mtf.tail->dict, &options))
goto lossage;
}
else if (lex_match_id ("MAP"))
{
struct mtf_file *iter;
- for (iter = mtf_head; iter; iter = iter->next)
+ for (iter = mtf.head; iter; iter = iter->next)
{
int i;
- iter->by = xmalloc (sizeof *iter->by * mtf_n_by);
+ iter->by = xmalloc (sizeof *iter->by * mtf.by_cnt);
- for (i = 0; i < mtf_n_by; i++)
+ for (i = 0; i < mtf.by_cnt; i++)
{
- iter->by[i] = find_dict_variable (iter->dict, mtf_by[i]->name);
+ iter->by[i] = dict_lookup_var (iter->dict, mtf.by[i]->name);
if (iter->by[i] == NULL)
{
msg (SE, _("File %s lacks BY variable %s."),
- iter->handle ? fh_handle_name (iter->handle) : "*",
- mtf_by[i]->name);
+ iter->handle ? handle_get_name (iter->handle) : "*",
+ mtf.by[i]->name);
goto lossage;
}
}
}
}
-#if DEBUGGING
- {
- /* From sfm-read.c. */
- extern void dump_dictionary (struct dictionary *);
-
- dump_dictionary (mtf_master);
- }
-#endif
-
/* MATCH FILES performs an n-way merge on all its input files.
Abstract algorithm:
because there's no function to read a record from the active
file; instead, it has to be done using callbacks.
- FIXME: A better algorithm would use a heap for finding minimum
- values, or replacement selection, as described by Knuth in _Art
- of Computer Programming, Vol. 3_. The SORT CASES procedure does
- this, and perhaps some of its code could be adapted. */
+ FIXME: For merging large numbers of files (more than 10?) a
+ better algorithm would use a heap for finding minimum
+ values. */
if (!(seen & 2))
discard_variables ();
- temporary = 2;
- temp_dict = mtf_master;
- temp_trns = n_trns;
+ mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
+
+ mtf.seq_nums = xmalloc (dict_get_var_cnt (mtf.dict)
+ * sizeof *mtf.seq_nums);
+ memset (mtf.seq_nums, 0,
+ dict_get_var_cnt (mtf.dict) * sizeof *mtf.seq_nums);
+ mtf.mtf_case = xmalloc (dict_get_case_size (mtf.dict));
- process_active_file (mtf_read_nonactive_records, mtf_processing,
- mtf_processing_finish);
- mtf_master = NULL;
+ mtf_read_nonactive_records (NULL);
+ if (seen & 2)
+ procedure (mtf_processing, NULL);
+ mtf_processing_finish (NULL);
+
+ dict_destroy (default_dict);
+ default_dict = mtf.dict;
+ mtf.dict = NULL;
+ vfm_source = mtf.sink->class->make_source (mtf.sink);
+ free_case_sink (mtf.sink);
- mtf_free ();
+ mtf_free (&mtf);
return CMD_SUCCESS;
lossage:
- mtf_free ();
+ mtf_free (&mtf);
return CMD_FAILURE;
}
/* Repeats 2...8 an arbitrary number of times. */
static void
-mtf_processing_finish (void)
+mtf_processing_finish (void *mtf_)
{
+ struct mtf_proc *mtf = mtf_;
+ struct mtf_file *iter;
+
/* Find the active file and delete it. */
- {
- struct mtf_file *iter;
-
- for (iter = mtf_head; iter; iter = iter->next)
- if (iter->handle == NULL)
- {
- mtf_delete_file_in_place (&iter);
- break;
- }
- }
+ for (iter = mtf->head; iter; iter = iter->next)
+ if (iter->handle == NULL)
+ {
+ mtf_delete_file_in_place (mtf, &iter);
+ break;
+ }
- while (mtf_head && mtf_head->type == MTF_FILE)
- if (!mtf_processing (temp_case))
+ while (mtf->head && mtf->head->type == MTF_FILE)
+ if (!mtf_processing (NULL, mtf))
break;
}
mtf_free_file (struct mtf_file *file)
{
fh_close_handle (file->handle);
- if (file->dict && file->dict != &default_dict)
- free_dictionary (file->dict);
+ if (file->dict != NULL && file->dict != default_dict)
+ dict_destroy (file->dict);
free (file->by);
if (file->handle)
- free (file->input);
+ case_destroy (&file->input);
free (file);
}
/* Free all the data for the MATCH FILES procedure. */
static void
-mtf_free (void)
+mtf_free (struct mtf_proc *mtf)
{
struct mtf_file *iter, *next;
- for (iter = mtf_head; iter; iter = next)
+ for (iter = mtf->head; iter; iter = next)
{
next = iter->next;
mtf_free_file (iter);
}
- free (mtf_by);
- if (mtf_master)
- free_dictionary (mtf_master);
+ free (mtf->by);
+ if (mtf->dict)
+ dict_destroy (mtf->dict);
+ free (mtf->seq_nums);
}
/* Remove *FILE from the mtf_file chain. Make *FILE point to the next
file in the chain, or to NULL if was the last in the chain. */
static void
-mtf_delete_file_in_place (struct mtf_file **file)
+mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
{
struct mtf_file *f = *file;
f->prev->next = f->next;
if (f->next)
f->next->prev = f->prev;
- if (f == mtf_head)
- mtf_head = f->next;
- if (f == mtf_tail)
- mtf_tail = f->prev;
+ if (f == mtf->head)
+ mtf->head = f->next;
+ if (f == mtf->tail)
+ mtf->tail = f->prev;
*file = f->next;
{
int i;
- for (i = 0; i < f->dict->nvar; i++)
+ for (i = 0; i < dict_get_var_cnt (f->dict); i++)
{
- struct variable *v = f->dict->var[i];
+ struct variable *v = dict_get_var (f->dict, i);
+ union value *out = case_data_rw (mtf->mtf_case, v->p.mtf.master->fv);
if (v->type == NUMERIC)
- compaction_case->data[v->p.mtf.master->fv].f = SYSMIS;
+ out->f = SYSMIS;
else
- memset (compaction_case->data[v->p.mtf.master->fv].s, ' ',
- v->width);
+ memset (out->s, ' ', v->width);
}
}
-
+
mtf_free_file (f);
}
/* Read a record from every input file except the active file. */
static void
-mtf_read_nonactive_records (void)
+mtf_read_nonactive_records (void *mtf_ UNUSED)
{
+ struct mtf_proc *mtf = mtf_;
struct mtf_file *iter;
- for (iter = mtf_head; iter; )
+ for (iter = mtf->head; iter; )
{
if (iter->handle)
{
- assert (iter->input == NULL);
- iter->input = xmalloc (sizeof *iter->input * iter->dict->nval);
-
- if (!sfm_read_case (iter->handle, iter->input, iter->dict))
- mtf_delete_file_in_place (&iter);
+ if (!sfm_read_case (iter->handle, &iter->input, iter->dict))
+ mtf_delete_file_in_place (mtf, &iter);
else
iter = iter->next;
}
else
- {
- iter->input = temp_case->data;
- iter = iter->next;
- }
+ iter = iter->next;
}
}
/* Compare the BY variables for files A and B; return -1 if A < B, 0
if A == B, 1 if A > B. */
static inline int
-mtf_compare_BY_values (struct mtf_file *a, struct mtf_file *b)
+mtf_compare_BY_values (struct mtf_proc *mtf,
+ struct mtf_file *a, struct mtf_file *b,
+ struct ccase *c)
{
+ struct ccase *a_input, *b_input;
int i;
-
- for (i = 0; i < mtf_n_by; i++)
+
+ assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
+ a_input = case_is_null (&a->input) ? c : &a->input;
+ b_input = case_is_null (&b->input) ? c : &b->input;
+ for (i = 0; i < mtf->by_cnt; i++)
{
assert (a->by[i]->type == b->by[i]->type);
assert (a->by[i]->width == b->by[i]->width);
if (a->by[i]->type == NUMERIC)
{
- double af = a->input[a->by[i]->fv].f;
- double bf = b->input[b->by[i]->fv].f;
+ double af = case_num (a_input, a->by[i]->fv);
+ double bf = case_num (b_input, b->by[i]->fv);
if (af < bf)
return -1;
int result;
assert (a->by[i]->type == ALPHA);
- result = memcmp (a->input[a->by[i]->fv].s,
- b->input[b->by[i]->fv].s,
+ result = memcmp (case_str (a_input, a->by[i]->fv),
+ case_str (b_input, b->by[i]->fv),
a->by[i]->width);
if (result < 0)
return -1;
return 0;
}
-/* Used to determine whether we've already initialized this
- variable. */
-static int mtf_seq_no = 0;
-
/* Perform one iteration of steps 3...7 above. */
static int
-mtf_processing (struct ccase *c unused)
+mtf_processing (struct ccase *c, void *mtf_ UNUSED)
{
- /* List of files with minimum BY values. */
- struct mtf_file *min_head, *min_tail;
-
- /* List of files with non-minimum BY values. */
- struct mtf_file *max_head, *max_tail;
-
- /* Iterator. */
- struct mtf_file *iter;
+ struct mtf_proc *mtf = mtf_;
+ struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
+ struct mtf_file *max_head, *max_tail; /* Files with non-minimum BY values. */
+ struct mtf_file *iter; /* Iterator. */
for (;;)
{
return because that would cause a record to be skipped. */
int advance = 1;
- if (mtf_head->type == MTF_TABLE)
+ if (mtf->head->type == MTF_TABLE)
return 0;
/* 3. Find the FILE input record with minimum BY values. Store
4. Find all the FILE input records with BY values identical
to the minimums. Store all the values from these input
records into the output record. */
- min_head = min_tail = mtf_head;
+ min_head = min_tail = mtf->head;
max_head = max_tail = NULL;
- for (iter = mtf_head->next; iter && iter->type == MTF_FILE;
+ for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
iter = iter->next)
- switch (mtf_compare_BY_values (min_head, iter))
+ switch (mtf_compare_BY_values (mtf, min_head, iter, c))
{
case -1:
if (max_head)
advance = 0;
again:
- switch (mtf_compare_BY_values (min_head, iter))
+ switch (mtf_compare_BY_values (mtf, min_head, iter, c))
{
case -1:
if (max_head)
case 1:
if (iter->handle == NULL)
return 1;
- if (sfm_read_case (iter->handle, iter->input, iter->dict))
+ if (sfm_read_case (iter->handle, &iter->input, iter->dict))
goto again;
- mtf_delete_file_in_place (&iter);
+ mtf_delete_file_in_place (mtf, &iter);
break;
default:
}
/* Next sequence number. */
- mtf_seq_no++;
+ mtf->seq_num++;
/* Store data to all the records we are using. */
if (min_tail)
{
int i;
- for (i = 0; i < iter->dict->nvar; i++)
+ for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
{
- struct variable *v = iter->dict->var[i];
+ struct variable *v = dict_get_var (iter->dict, i);
+ struct ccase *record;
+ union value *out;
- if (v->p.mtf.master->foo == mtf_seq_no)
+ if (mtf->seq_nums[v->p.mtf.master->index] == mtf->seq_num)
continue;
- v->p.mtf.master->foo = mtf_seq_no;
-
-#if 0
- printf ("%s/%s: dest-fv=%d, src-fv=%d\n",
- fh_handle_name (iter->handle),
- v->name,
- v->p.mtf.master->fv, v->fv);
-#endif
+ mtf->seq_nums[v->p.mtf.master->index] = mtf->seq_num;
+
+ record = case_is_null (&iter->input) ? c : &iter->input;
+
+ assert (v->type == NUMERIC || v->type == ALPHA);
+ out = case_data_rw (mtf->mtf_case, v->p.mtf.master->fv);
if (v->type == NUMERIC)
- compaction_case->data[v->p.mtf.master->fv].f
- = iter->input[v->fv].f;
+ out->f = case_num (record, v->fv);
else
- {
- assert (v->type == ALPHA);
- memcpy (compaction_case->data[v->p.mtf.master->fv].s,
- iter->input[v->fv].s, v->width);
-#if __CHECKER__
- memset (&compaction_case
- ->data[v->p.mtf.master->fv].s[v->width],
- 0, REM_RND_UP (v->width, MAX_SHORT_STRING));
-#endif
- }
+ memcpy (out->s, case_str (record, v->fv), v->width);
}
}
{
int i;
- for (i = 0; i < iter->dict->nvar; i++)
+ for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
{
- struct variable *v = iter->dict->var[i];
+ struct variable *v = dict_get_var (iter->dict, i);
+ union value *out;
- if (v->p.mtf.master->foo == mtf_seq_no)
+ if (mtf->seq_nums[v->p.mtf.master->index] == mtf->seq_num)
continue;
- v->p.mtf.master->foo = mtf_seq_no;
-
-#if 0
- printf ("%s/%s: dest-fv=%d\n",
- fh_handle_name (iter->handle),
- v->name,
- v->p.mtf.master->fv);
-#endif
+ mtf->seq_nums[v->p.mtf.master->index] = mtf->seq_num;
+
+ out = case_data_rw (mtf->mtf_case, v->p.mtf.master->fv);
if (v->type == NUMERIC)
- compaction_case->data[v->p.mtf.master->fv].f = SYSMIS;
+ out->f = SYSMIS;
else
- {
- memset (compaction_case->data[v->p.mtf.master->fv].s, ' ',
- v->width);
-#if __CHECKER__
- memset (&compaction_case
- ->data[v->p.mtf.master->fv].s[v->width],
- 0, REM_RND_UP (v->width, MAX_SHORT_STRING));
-#endif
- }
+ memset (out->s, ' ', v->width);
}
if (iter->handle == NULL)
}
/* 6. Write the output record. */
- process_active_file_output_case ();
+ mtf->sink->class->write (mtf->sink, mtf->mtf_case);
/* 7. Read another record from each input file FILE and TABLE
that we stored values from above. If we come to the end of
if (iter->handle)
{
- assert (iter->input != NULL);
-
- if (!sfm_read_case (iter->handle, iter->input, iter->dict))
- mtf_delete_file_in_place (&iter);
+ if (!sfm_read_case (iter->handle, &iter->input, iter->dict))
+ mtf_delete_file_in_place (mtf, &iter);
}
iter = next;
break;
}
- return (mtf_head && mtf_head->type != MTF_TABLE);
+ return (mtf->head && mtf->head->type != MTF_TABLE);
}
/* Merge the dictionary for file F into the master dictionary
- mtf_master. */
+ mtf_dict. */
static int
-mtf_merge_dictionary (struct mtf_file *f)
+mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
{
- struct dictionary *const m = mtf_master;
struct dictionary *d = f->dict;
-
- if (d->label && m->label == NULL)
- m->label = xstrdup (d->label);
+ const char *d_docs, *m_docs;
+
+ if (dict_get_label (m) == NULL)
+ dict_set_label (m, dict_get_label (d));
- if (d->documents)
+ d_docs = dict_get_documents (d);
+ m_docs = dict_get_documents (m);
+ if (d_docs != NULL)
{
- m->documents = xrealloc (m->documents,
- 80 * (m->n_documents + d->n_documents));
- memcpy (&m->documents[80 * m->n_documents],
- d->documents, 80 * d->n_documents);
- m->n_documents += d->n_documents;
+ if (m_docs == NULL)
+ dict_set_documents (m, d_docs);
+ else
+ {
+ char *new_docs;
+ size_t new_len;
+
+ new_len = strlen (m_docs) + strlen (d_docs);
+ new_docs = xmalloc (new_len + 1);
+ strcpy (new_docs, m_docs);
+ strcat (new_docs, d_docs);
+ dict_set_documents (m, new_docs);
+ free (new_docs);
+ }
}
-
+
+ dict_compact_values (d);
+
{
int i;
- d->nval = 0;
- for (i = 0; i < d->nvar; i++)
+ for (i = 0; i < dict_get_var_cnt (d); i++)
{
- struct variable *dv = d->var[i];
- struct variable *mv = find_dict_variable (m, dv->name);
+ struct variable *dv = dict_get_var (d, i);
+ struct variable *mv = dict_lookup_var (m, dv->name);
- dv->fv = d->nval;
- d->nval += dv->nv;
-
assert (dv->type == ALPHA || dv->width == 0);
assert (!mv || mv->type == ALPHA || mv->width == 0);
if (mv && dv->width == mv->width)
{
- if (dv->val_lab && !mv->val_lab)
- mv->val_lab = copy_value_labels (dv->val_lab);
- if (dv->miss_type != MISSING_NONE && mv->miss_type == MISSING_NONE)
+ if (val_labs_count (dv->val_labs)
+ && !val_labs_count (mv->val_labs))
+ mv->val_labs = val_labs_copy (dv->val_labs);
+ if (dv->miss_type != MISSING_NONE
+ && mv->miss_type == MISSING_NONE)
copy_missing_values (mv, dv);
}
if (mv && dv->label && !mv->label)
mv->label = xstrdup (dv->label);
- if (!mv)
- {
- mv = force_dup_variable (m, dv, dv->name);
-
- /* Used to make sure we initialize each variable in the
- master dictionary exactly once per case. */
- mv->foo = mtf_seq_no;
- }
+ if (!mv)
+ {
+ mv = dict_clone_var (m, dv, dv->name);
+ assert (mv != NULL);
+ }
else if (mv->width != dv->width)
{
msg (SE, _("Variable %s in file %s (%s) has different "
"type or width from the same variable in "
"earlier file (%s)."),
- dv->name, fh_handle_name (f->handle),
+ dv->name, handle_get_name (f->handle),
var_type_description (dv), var_type_description (mv));
return 0;
}
{
struct file_handle *handle = NULL;
struct dictionary *dict;
+ struct get_pgm *pgm;
int options = GTSV_OPT_NONE;
int type;
- int i;
- int nval;
-
- lex_match_id ("IMPORT");
-
for (;;)
{
lex_match ('/');
if (dict == NULL)
return CMD_FAILURE;
-#if DEBUGGING
- dump_dict_variables (dict);
-#endif
if (0 == trim_dictionary (dict, &options))
{
fh_close_handle (handle);
return CMD_FAILURE;
}
-#if DEBUGGING
- dump_dict_variables (dict);
-#endif
-
- /* Set the fv and lv elements of all variables remaining in the
- dictionary. */
- nval = 0;
- for (i = 0; i < dict->nvar; i++)
- {
- struct variable *v = dict->var[i];
-
- v->fv = nval;
- nval += v->nv;
- }
- dict->nval = nval;
- assert (nval);
-#if DEBUGGING
- printf (_("IMPORT translation table from file to memory:\n"));
- for (i = 0; i < dict->nvar; i++)
- {
- struct variable *v = dict->var[i];
+ dict_compact_values (dict);
- printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name,
- v->get.fv, v->get.nv, v->fv, v->nv);
- }
-#endif
+ dict_destroy (default_dict);
+ default_dict = dict;
- restore_dictionary (dict);
-
- vfm_source = &import_source;
- get_file = handle;
+ pgm = xmalloc (sizeof *pgm);
+ pgm->handle = handle;
+ pgm->case_size = dict_get_case_size (default_dict);
+ vfm_source = create_case_source (&import_source_class, default_dict, pgm);
return CMD_SUCCESS;
}
/* Reads all the cases from the data file and passes them to
write_case(). */
static void
-import_source_read (void)
+import_source_read (struct case_source *source,
+ struct ccase *c,
+ write_case_func *write_case, write_case_data wc_data)
{
- while (pfm_read_case (get_file, temp_case->data, &default_dict)
- && write_case ())
- ;
- get_source_destroy_source ();
+ struct get_pgm *pgm = source->aux;
+
+ while (pfm_read_case (pgm->handle, c, default_dict))
+ if (!write_case (wc_data))
+ break;
}
-struct case_stream import_source =
+const struct case_source_class import_source_class =
{
+ "IMPORT",
NULL,
import_source_read,
- NULL,
- NULL,
- get_source_destroy_source,
- NULL,
- "IMPORT",
+ get_source_destroy,
};
\f
-static int export_write_case_func (struct ccase *c);
+static int export_write_case_func (struct ccase *c, void *);
/* Parses the EXPORT command. */
/* FIXME: same as cmd_save_internal(). */
int i;
- lex_match_id ("EXPORT");
-
lex_match ('/');
if (lex_match_id ("OUTFILE"))
lex_match ('=');
if (handle == NULL)
return CMD_FAILURE;
- dict = save_dictionary ();
-#if DEBUGGING
- dump_dict_variables (dict);
-#endif
- for (i = 0; i < dict->nvar; i++)
- dict->var[i]->foo = i;
+ dict = dict_clone (default_dict);
+ for (i = 0; i < dict_get_var_cnt (dict); i++)
+ dict_get_var (dict, i)->aux = dict_get_var (default_dict, i);
if (0 == trim_dictionary (dict, &options))
{
fh_close_handle (handle);
return CMD_FAILURE;
}
-#if DEBUGGING
- dump_dict_variables (dict);
-#endif
-
/* Write dictionary. */
if (!pfm_write_dictionary (handle, dict))
{
- free_dictionary (dict);
+ dict_destroy (dict);
fh_close_handle (handle);
return CMD_FAILURE;
}
/* Fill in transformation structure. */
- t = trns = xmalloc (sizeof *t);
+ t = xmalloc (sizeof *t);
t->h.proc = save_trns_proc;
t->h.free = save_trns_free;
t->f = handle;
- t->nvar = dict->nvar;
- t->var = xmalloc (sizeof *t->var * dict->nvar);
- for (i = 0; i < dict->nvar; i++)
- t->var[i] = dict->var[i]->foo;
- t->case_buf = xmalloc (sizeof *t->case_buf * dict->nvar);
- free_dictionary (dict);
+ t->nvar = dict_get_var_cnt (dict);
+ t->var = xmalloc (sizeof *t->var * t->nvar);
+ for (i = 0; i < t->nvar; i++)
+ t->var[i] = dict_get_var (dict, i)->aux;
+ t->case_buf = xmalloc (sizeof *t->case_buf * t->nvar);
+ dict_destroy (dict);
- procedure (NULL, export_write_case_func, NULL);
- save_trns_free ((struct trns_header *) t);
+ procedure (export_write_case_func, t);
+ save_trns_free (&t->h);
return CMD_SUCCESS;
}
+/* Writes case C to the EXPORT file. */
static int
-export_write_case_func (struct ccase *c)
+export_write_case_func (struct ccase *c, void *aux)
{
- union value *p = (union value *) trns->case_buf;
+ struct save_trns *t = aux;
+ union value *p = (union value *) t->case_buf;
int i;
- for (i = 0; i < trns->nvar; i++)
+ for (i = 0; i < t->nvar; i++)
{
- struct variable *v = default_dict.var[trns->var[i]];
+ struct variable *v = t->var[i];
if (v->type == NUMERIC)
- *p++ = c->data[v->fv];
+ (*p++).f = case_num (c, v->fv);
else
- (*p++).c = c->data[v->fv].s;
+ (*p++).c = (char *) case_str (c, v->fv);
}
- printf (".");
- fflush (stdout);
-
- pfm_write_case (trns->f, (union value *) trns->case_buf);
+ pfm_write_case (t->f, (union value *) t->case_buf);
return 1;
}