X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fget.c;h=dc16d7a5372328482cd71d210ad9ff6a00872b41;hb=205eaea8e2d95e20baa2c00a495b0ac4f9646372;hp=f4428a5ead4a25d149ca843f9015f4b84f403340;hpb=fcb9e49b2a2d57af7c001ae5d2eda9ac443ba36b;p=pspp-builds.git diff --git a/src/get.c b/src/get.c index f4428a5e..dc16d7a5 100644 --- a/src/get.c +++ b/src/get.c @@ -18,32 +18,41 @@ 02111-1307, USA. */ #include -#include +#include "error.h" #include #include "alloc.h" -#include "avl.h" +#include "case.h" #include "command.h" #include "error.h" #include "file-handle.h" +#include "hash.h" #include "lexer.h" #include "misc.h" #include "pfm.h" #include "settings.h" #include "sfm.h" #include "str.h" +#include "value-labels.h" #include "var.h" #include "vfm.h" #include "vfmP.h" #include "debug-print.h" +/* GET or IMPORT input program. */ +struct get_pgm + { + struct file_handle *handle; /* File to GET or IMPORT from. */ + size_t case_size; /* Case size in bytes. */ + }; + /* XSAVE transformation (and related SAVE, EXPORT procedures). */ struct save_trns { struct trns_header h; struct file_handle *f; /* Associated system file. */ int nvar; /* Number of variables. */ - int *var; /* Indices of variables. */ + struct variable **var; /* Variables. */ flt64 *case_buf; /* Case transfer buffer. */ }; @@ -53,20 +62,10 @@ struct save_trns #define GTSV_OPT_MATCH_FILES 004 /* The MATCH FILES procedure. */ #define GTSV_OPT_NONE 0 -/* The file being read by the input program. */ -static struct file_handle *get_file; - -/* The transformation being used by the SAVE procedure. */ -static struct save_trns *trns; - static int trim_dictionary (struct dictionary * dict, int *options); -static int save_write_case_func (struct ccase *); -static int save_trns_proc (struct trns_header *, struct ccase *); -static void save_trns_free (struct trns_header *); - -#if DEBUGGING -void dump_dict_variables (struct dictionary *); -#endif +static int save_write_case_func (struct ccase *, void *); +static trns_proc_func save_trns_proc; +static trns_free_func save_trns_free; /* Parses the GET command. */ int @@ -74,12 +73,9 @@ cmd_get (void) { struct file_handle *handle; struct dictionary *dict; + struct get_pgm *pgm; int options = GTSV_OPT_NONE; - int i; - int nval; - - lex_match_id ("GET"); discard_variables (); lex_match ('/'); @@ -94,56 +90,35 @@ cmd_get (void) if (dict == NULL) return CMD_FAILURE; -#if DEBUGGING - dump_dict_variables (dict); -#endif if (0 == trim_dictionary (dict, &options)) { fh_close_handle (handle); return CMD_FAILURE; } -#if DEBUGGING - dump_dict_variables (dict); -#endif - - /* Set the fv and lv elements of all variables remaining in the - dictionary. */ - nval = 0; - for (i = 0; i < dict->nvar; i++) - { - struct variable *v = dict->var[i]; - v->fv = nval; - nval += v->nv; - } - dict->nval = nval; - assert (nval); + dict_compact_values (dict); -#if DEBUGGING - printf (_("GET translation table from file to memory:\n")); - for (i = 0; i < dict->nvar; i++) - { - struct variable *v = dict->var[i]; - - printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name, - v->get.fv, v->get.nv, v->fv, v->nv); - } -#endif + dict_destroy (default_dict); + default_dict = dict; - restore_dictionary (dict); - - vfm_source = &get_source; - get_file = handle; + pgm = xmalloc (sizeof *pgm); + pgm->handle = handle; + pgm->case_size = dict_get_case_size (default_dict); + vfm_source = create_case_source (&get_source_class, default_dict, pgm); return CMD_SUCCESS; } -/* Parses the SAVE (for X==0) and XSAVE (for X==1) commands. */ -/* FIXME: save_dictionary() is too expensive. It would make more - sense to copy just the first few fields of each variables (up to - `foo'): that's a SMOP. */ -int -cmd_save_internal (int x) +/* SAVE or XSAVE command? */ +enum save_cmd + { + CMD_SAVE, + CMD_XSAVE + }; + +/* Parses the SAVE and XSAVE commands. */ +static int +cmd_save_internal (enum save_cmd save_cmd) { struct file_handle *handle; struct dictionary *dict; @@ -154,8 +129,6 @@ cmd_save_internal (int x) int i; - lex_match_id ("SAVE"); - lex_match ('/'); if (lex_match_id ("OUTFILE")) lex_match ('='); @@ -164,54 +137,48 @@ cmd_save_internal (int x) if (handle == NULL) return CMD_FAILURE; - dict = save_dictionary (); -#if DEBUGGING - dump_dict_variables (dict); -#endif - for (i = 0; i < dict->nvar; i++) - dict->var[i]->foo = i; + dict = dict_clone (default_dict); + for (i = 0; i < dict_get_var_cnt (dict); i++) + dict_get_var (dict, i)->aux = dict_get_var (default_dict, i); if (0 == trim_dictionary (dict, &options)) { fh_close_handle (handle); return CMD_FAILURE; } -#if DEBUGGING - dump_dict_variables (dict); -#endif - /* Write dictionary. */ inf.h = handle; inf.dict = dict; inf.compress = !!(options & GTSV_OPT_COMPRESSED); if (!sfm_write_dictionary (&inf)) { - free_dictionary (dict); + dict_destroy (dict); fh_close_handle (handle); return CMD_FAILURE; } /* Fill in transformation structure. */ - t = trns = xmalloc (sizeof *t); + t = xmalloc (sizeof *t); t->h.proc = save_trns_proc; t->h.free = save_trns_free; t->f = handle; - t->nvar = dict->nvar; - t->var = xmalloc (sizeof *t->var * dict->nvar); - for (i = 0; i < dict->nvar; i++) - t->var[i] = dict->var[i]->foo; + t->nvar = dict_get_var_cnt (dict); + t->var = xmalloc (sizeof *t->var * t->nvar); + for (i = 0; i < t->nvar; i++) + t->var[i] = dict_get_var (dict, i)->aux; t->case_buf = xmalloc (sizeof *t->case_buf * inf.case_size); - free_dictionary (dict); + dict_destroy (dict); - if (x == 0) - /* SAVE. */ + if (save_cmd == CMD_SAVE) { - procedure (NULL, save_write_case_func, NULL); - save_trns_free ((struct trns_header *) t); + procedure (save_write_case_func, t); + save_trns_free (&t->h); + } + else + { + assert (save_cmd == CMD_XSAVE); + add_transformation (&t->h); } - else - /* XSAVE. */ - add_transformation ((struct trns_header *) t); return CMD_SUCCESS; } @@ -220,35 +187,29 @@ cmd_save_internal (int x) int cmd_save (void) { - return cmd_save_internal (0); + return cmd_save_internal (CMD_SAVE); } /* Parses the XSAVE transformation command. */ int cmd_xsave (void) { - return cmd_save_internal (1); -} - -static int -save_write_case_func (struct ccase * c) -{ - save_trns_proc ((struct trns_header *) trns, c); - return 1; + return cmd_save_internal (CMD_XSAVE); } -static int -save_trns_proc (struct trns_header * t unused, struct ccase * c) +/* Writes the given C to the file specified by T. */ +static void +do_write_case (struct save_trns *t, struct ccase *c) { - flt64 *p = trns->case_buf; + flt64 *p = t->case_buf; int i; - for (i = 0; i < trns->nvar; i++) + for (i = 0; i < t->nvar; i++) { - struct variable *v = default_dict.var[trns->var[i]]; + struct variable *v = t->var[i]; if (v->type == NUMERIC) { - double src = c->data[v->fv].f; + double src = case_num (c, v->fv); if (src == SYSMIS) *p++ = -FLT64_MAX; else @@ -256,17 +217,34 @@ save_trns_proc (struct trns_header * t unused, struct ccase * c) } else { - memcpy (p, c->data[v->fv].s, v->width); + memcpy (p, case_str (c, v->fv), v->width); memset (&((char *) p)[v->width], ' ', REM_RND_UP (v->width, sizeof *p)); p += DIV_RND_UP (v->width, sizeof *p); } } - sfm_write_case (trns->f, trns->case_buf, p - trns->case_buf); + sfm_write_case (t->f, t->case_buf, p - t->case_buf); +} + +/* Writes case C to the system file specified on SAVE. */ +static int +save_write_case_func (struct ccase *c, void *aux UNUSED) +{ + do_write_case (aux, c); + return 1; +} + +/* Writes case C to the system file specified on XSAVE. */ +static int +save_trns_proc (struct trns_header *h, struct ccase *c, int case_num UNUSED) +{ + struct save_trns *t = (struct save_trns *) h; + do_write_case (t, c); return -1; } +/* Frees a SAVE transformation. */ static void save_trns_free (struct trns_header *pt) { @@ -278,27 +256,6 @@ save_trns_free (struct trns_header *pt) free (t); } -/* Deletes NV variables from DICT, starting at index FIRST. The - variables must have consecutive indices. The variables are cleared - and freed. */ -static void -dict_delete_run (struct dictionary *dict, int first, int nv) -{ - int i; - - for (i = first; i < first + nv; i++) - { - clear_variable (dict, dict->var[i]); - free (dict->var[i]); - } - for (i = first; i < dict->nvar - nv; i++) - { - dict->var[i] = dict->var[i + nv]; - dict->var[i]->index -= nv; - } - dict->nvar -= nv; -} - static int rename_variables (struct dictionary * dict); /* The GET and SAVE commands have a common structure after the @@ -307,32 +264,28 @@ static int rename_variables (struct dictionary * dict); *OPTIONS, for the GTSV_OPT_SAVE bit, and writes it, for the GTSV_OPT_COMPRESSED bit. */ /* FIXME: IN, FIRST, LAST, MAP. */ +/* FIXME? Should we call dict_compact_values() on dict as a + final step? */ static int trim_dictionary (struct dictionary *dict, int *options) { - if (set_scompression) + if (get_scompression()) *options |= GTSV_OPT_COMPRESSED; if (*options & GTSV_OPT_SAVE) { - int i; - /* Delete all the scratch variables. */ - for (i = 0; i < dict->nvar; i++) - { - int j; - - if (dict->var[i]->name[0] != '#') - continue; - - /* Find a run of variables to be deleted. */ - for (j = i + 1; j < dict->nvar; j++) - if (dict->var[j]->name[0] != '#') - break; - - /* Actually delete 'em. */ - dict_delete_run (dict, i, j - i); - } + struct variable **v; + size_t nv; + size_t i; + + v = xmalloc (sizeof *v * dict_get_var_cnt (dict)); + nv = 0; + for (i = 0; i < dict_get_var_cnt (dict); i++) + if (dict_class_from_id (dict_get_var (dict, i)->name) == DC_SCRATCH) + v[nv++] = dict_get_var (dict, i); + dict_delete_vars (dict, v, nv); + free (v); } while ((*options & GTSV_OPT_MATCH_FILES) || lex_match ('/')) @@ -345,68 +298,32 @@ trim_dictionary (struct dictionary *dict, int *options) { struct variable **v; int nv; - int i; lex_match ('='); if (!parse_variables (dict, &v, &nv, PV_NONE)) return 0; - - /* Loop through the variables to delete. */ - for (i = 0; i < nv;) - { - int j; - - /* Find a run of variables to be deleted. */ - for (j = i + 1; j < nv; j++) - if (v[j]->index != v[j - 1]->index + 1) - break; - - /* Actually delete 'em. */ - dict_delete_run (dict, v[i]->index, j - i); - i = j; - } + dict_delete_vars (dict, v, nv); + free (v); } else if (lex_match_id ("KEEP")) { struct variable **v; int nv; + int i; lex_match ('='); if (!parse_variables (dict, &v, &nv, PV_NONE)) return 0; - /* Reorder the dictionary so that the kept variables are at - the beginning. */ - { - int i1; - - for (i1 = 0; i1 < nv; i1++) - { - int i2 = v[i1]->index; - - /* Swap variables with indices i1 and i2. */ - struct variable *t = dict->var[i1]; - dict->var[i1] = dict->var[i2]; - dict->var[i2] = t; - dict->var[i1]->index = i1; - dict->var[i2]->index = i2; - } - - free (v); - } - - /* Delete all but the first NV variables from the - dictionary. */ - { - int i; - for (i = nv; i < dict->nvar; i++) - { - clear_variable (dict, dict->var[i]); - free (dict->var[i]); - } - } - dict->var = xrealloc (dict->var, sizeof *dict->var * nv); - dict->nvar = nv; + /* Move the specified variables to the beginning. */ + dict_reorder_vars (dict, v, nv); + + /* Delete the remaining variables. */ + v = xrealloc (v, (dict_get_var_cnt (dict) - nv) * sizeof *v); + for (i = nv; i < dict_get_var_cnt (dict); i++) + v[i - nv] = dict_get_var (dict, i); + dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv); + free (v); } else if (lex_match_id ("RENAME")) { @@ -419,7 +336,7 @@ trim_dictionary (struct dictionary *dict, int *options) return 0; } - if (dict->nvar == 0) + if (dict_get_var_cnt (dict) == 0) { msg (SE, _("All variables deleted from system file dictionary.")); return 0; @@ -449,6 +366,7 @@ rename_variables (struct dictionary * dict) struct variable **v; char **new_names; int nv, nn; + char *err_name; int group; @@ -465,7 +383,7 @@ rename_variables (struct dictionary * dict) return 0; if (!strncmp (tokid, v->name, 8)) return 1; - if (is_dict_varname (dict, tokid)) + if (dict_lookup_var (dict, tokid) != NULL) { msg (SE, _("Cannot rename %s as %s because there already exists " "a variable named %s. To rename variables with " @@ -475,7 +393,7 @@ rename_variables (struct dictionary * dict) return 0; } - rename_variable (dict, v, tokid); + dict_rename_var (dict, v, tokid); lex_get (); return 1; } @@ -489,43 +407,35 @@ rename_variables (struct dictionary * dict) int old_nv = nv; if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND)) - goto lossage; + goto done; if (!lex_match ('=')) { msg (SE, _("`=' expected after variable list.")); - goto lossage; + goto done; } if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH)) - goto lossage; + goto done; if (nn != nv) { - msg (SE, _("Number of variables on left side of `=' (%d) do not " + msg (SE, _("Number of variables on left side of `=' (%d) does not " "match number of variables on right side (%d), in " "parenthesized group %d of RENAME subcommand."), nv - old_nv, nn - old_nv, group); - goto lossage; + goto done; } if (!lex_force_match (')')) - goto lossage; + goto done; group++; } - for (i = 0; i < nv; i++) - avl_force_delete (dict->var_by_name, v[i]); - for (i = 0; i < nv; i++) + if (!dict_rename_vars (dict, v, new_names, nv, &err_name)) { - strcpy (v[i]->name, new_names[i]); - if (NULL != avl_insert (dict->var_by_name, v[i])) - { - msg (SE, _("Duplicate variables name %s."), v[i]->name); - goto lossage; - } + msg (SE, _("Requested renaming duplicates variable name %s."), err_name); + goto done; } success = 1; -lossage: - /* The label is a bit of a misnomer, we actually come here on any - sort of return. */ +done: for (i = 0; i < nn; i++) free (new_names[i]); free (new_names); @@ -533,49 +443,39 @@ lossage: return success; } - -#if DEBUGGING -void -dump_dict_variables (struct dictionary * dict) -{ - int i; - - printf (_("\nVariables in dictionary:\n")); - for (i = 0; i < dict->nvar; i++) - printf ("%s, ", dict->var[i]->name); - printf ("\n"); -} -#endif /* Clears internal state related to GET input procedure. */ static void -get_source_destroy_source (void) +get_source_destroy (struct case_source *source) { + struct get_pgm *pgm = source->aux; + /* It is not necessary to destroy the dictionary because if we get to this point then the dictionary is default_dict. */ - fh_close_handle (get_file); + fh_close_handle (pgm->handle); + free (pgm); } -/* Reads all the cases from the data file and passes them to - write_case(). */ +/* Reads all the cases from the data file into C and passes them + to WRITE_CASE one by one, passing WC_DATA. */ static void -get_source_read (void) +get_source_read (struct case_source *source, + struct ccase *c, + write_case_func *write_case, write_case_data wc_data) { - while (sfm_read_case (get_file, temp_case->data, &default_dict) - && write_case ()) + struct get_pgm *pgm = source->aux; + + while (sfm_read_case (pgm->handle, c, default_dict) + && write_case (wc_data)) ; - get_source_destroy_source (); } -struct case_stream get_source = +const struct case_source_class get_source_class = { + "GET", NULL, get_source_read, - NULL, - NULL, - get_source_destroy_source, - NULL, - "GET", + get_source_destroy, }; @@ -603,27 +503,34 @@ struct mtf_file struct dictionary *dict; /* Dictionary from system file. */ char in[9]; /* Name of the variable from IN=. */ char first[9], last[9]; /* Name of the variables from FIRST=, LAST=. */ - union value *input; /* Input record. */ + struct ccase input; /* Input record. */ }; -/* All the files mentioned on FILE= or TABLE=. */ -static struct mtf_file *mtf_head, *mtf_tail; +/* MATCH FILES procedure. */ +struct mtf_proc + { + struct mtf_file *head; /* First file mentioned on FILE or TABLE. */ + struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */ + + struct variable **by; /* Variables on the BY subcommand. */ + size_t by_cnt; /* Number of variables on BY subcommand. */ -/* Variables on the BY subcommand. */ -static struct variable **mtf_by; -static int mtf_n_by; + struct dictionary *dict; /* Dictionary of output file. */ + struct case_sink *sink; /* Sink to receive output. */ + struct ccase *mtf_case; /* Case used for output. */ -/* Master dictionary. */ -static struct dictionary *mtf_master; + unsigned seq_num; /* Have we initialized this variable? */ + unsigned *seq_nums; /* Sequence numbers for each var in dict. */ + }; -static void mtf_free (void); -static void mtf_free_file (struct mtf_file *file); -static int mtf_merge_dictionary (struct mtf_file *f); -static void mtf_delete_file_in_place (struct mtf_file **file); +static void mtf_free (struct mtf_proc *); +static void mtf_free_file (struct mtf_file *); +static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *); +static void mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **); -static void mtf_read_nonactive_records (void); -static void mtf_processing_finish (void); -static int mtf_processing (struct ccase *); +static void mtf_read_nonactive_records (void *); +static void mtf_processing_finish (void *); +static int mtf_processing (struct ccase *, void *); static char *var_type_description (struct variable *); @@ -631,18 +538,20 @@ static char *var_type_description (struct variable *); int cmd_match_files (void) { + struct mtf_proc mtf; struct mtf_file *first_table = NULL; int seen = 0; - lex_match_id ("MATCH"); - lex_match_id ("FILES"); - - mtf_head = mtf_tail = NULL; - mtf_by = NULL; - mtf_n_by = 0; - mtf_master = new_dictionary (0); - mtf_master->N = default_dict.N; + mtf.head = mtf.tail = NULL; + mtf.by = NULL; + mtf.by_cnt = 0; + mtf.dict = dict_create (); + mtf.sink = NULL; + mtf.mtf_case = NULL; + mtf.seq_num = 0; + mtf.seq_nums = NULL; + dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict)); do { @@ -658,7 +567,7 @@ cmd_match_files (void) seen |= 1; lex_match ('='); - if (!parse_variables (mtf_master, &mtf_by, &mtf_n_by, + if (!parse_variables (mtf.dict, &mtf.by, &mtf.by_cnt, PV_NO_DUPLICATE | PV_NO_SCRATCH)) goto lossage; } @@ -674,7 +583,7 @@ cmd_match_files (void) file->in[0] = file->first[0] = file->last[0] = '\0'; file->dict = NULL; file->by = NULL; - file->input = NULL; + case_nullify (&file->input); if (lex_match_id ("FILE")) file->type = MTF_FILE; @@ -690,12 +599,12 @@ cmd_match_files (void) if (file->type == MTF_TABLE || first_table == NULL) { file->next = NULL; - file->prev = mtf_tail; - if (mtf_tail) - mtf_tail->next = file; - mtf_tail = file; - if (mtf_head == NULL) - mtf_head = file; + file->prev = mtf.tail; + if (mtf.tail) + mtf.tail->next = file; + mtf.tail = file; + if (mtf.head == NULL) + mtf.head = file; if (file->type == MTF_TABLE && first_table == NULL) first_table = file; } @@ -707,7 +616,7 @@ cmd_match_files (void) if (first_table->prev) first_table->prev->next = file; else - mtf_head = file; + mtf.head = file; first_table->prev = file; } @@ -732,6 +641,15 @@ cmd_match_files (void) "file has been defined.")); goto lossage; } + + if (temporary != 0) + { + msg (SE, + _("MATCH FILES may not be used after TEMPORARY when " + "the active file is an input source. " + "Temporary transformations will be made permanent.")); + cancel_temporary (); + } } else { @@ -745,10 +663,11 @@ cmd_match_files (void) file->dict = sfm_read_dictionary (file->handle, NULL); if (!file->dict) goto lossage; + case_create (&file->input, dict_get_next_value_idx (file->dict)); } else - file->dict = &default_dict; - if (!mtf_merge_dictionary (file)) + file->dict = default_dict; + if (!mtf_merge_dictionary (mtf.dict, file)) goto lossage; } else if (lex_id_match ("IN", tokid) @@ -758,7 +677,7 @@ cmd_match_files (void) const char *sbc; char *name; - if (mtf_tail == NULL) + if (mtf.tail == NULL) { msg (SE, _("IN, FIRST, and LAST subcommands may not occur " "before the first FILE or TABLE.")); @@ -767,21 +686,24 @@ cmd_match_files (void) if (lex_match_id ("IN")) { - name = mtf_tail->in; + name = mtf.tail->in; sbc = "IN"; } else if (lex_match_id ("FIRST")) { - name = mtf_tail->first; + name = mtf.tail->first; sbc = "FIRST"; } else if (lex_match_id ("LAST")) { - name = mtf_tail->last; + name = mtf.tail->last; sbc = "LAST"; } - else - assert (0); + else + { + assert (0); + abort (); + } lex_match ('='); if (token != T_ID) @@ -800,7 +722,7 @@ cmd_match_files (void) strcpy (name, tokid); lex_get (); - if (!create_variable (mtf_master, name, NUMERIC, 0)) + if (!dict_create_var (mtf.dict, name, 0)) { msg (SE, _("Duplicate variable name %s while creating %s " "variable."), @@ -814,14 +736,14 @@ cmd_match_files (void) { int options = GTSV_OPT_MATCH_FILES; - if (mtf_tail == NULL) + if (mtf.tail == NULL) { msg (SE, _("RENAME, KEEP, and DROP subcommands may not occur " "before the first FILE or TABLE.")); goto lossage; } - if (!trim_dictionary (mtf_tail->dict, &options)) + if (!trim_dictionary (mtf.tail->dict, &options)) goto lossage; } else if (lex_match_id ("MAP")) @@ -850,35 +772,26 @@ cmd_match_files (void) { struct mtf_file *iter; - for (iter = mtf_head; iter; iter = iter->next) + for (iter = mtf.head; iter; iter = iter->next) { int i; - iter->by = xmalloc (sizeof *iter->by * mtf_n_by); + iter->by = xmalloc (sizeof *iter->by * mtf.by_cnt); - for (i = 0; i < mtf_n_by; i++) + for (i = 0; i < mtf.by_cnt; i++) { - iter->by[i] = find_dict_variable (iter->dict, mtf_by[i]->name); + iter->by[i] = dict_lookup_var (iter->dict, mtf.by[i]->name); if (iter->by[i] == NULL) { msg (SE, _("File %s lacks BY variable %s."), - iter->handle ? fh_handle_name (iter->handle) : "*", - mtf_by[i]->name); + iter->handle ? handle_get_name (iter->handle) : "*", + mtf.by[i]->name); goto lossage; } } } } -#if DEBUGGING - { - /* From sfm-read.c. */ - extern void dump_dictionary (struct dictionary *); - - dump_dictionary (mtf_master); - } -#endif - /* MATCH FILES performs an n-way merge on all its input files. Abstract algorithm: @@ -910,48 +823,57 @@ cmd_match_files (void) because there's no function to read a record from the active file; instead, it has to be done using callbacks. - FIXME: A better algorithm would use a heap for finding minimum - values, or replacement selection, as described by Knuth in _Art - of Computer Programming, Vol. 3_. The SORT CASES procedure does - this, and perhaps some of its code could be adapted. */ + FIXME: For merging large numbers of files (more than 10?) a + better algorithm would use a heap for finding minimum + values. */ if (!(seen & 2)) discard_variables (); - temporary = 2; - temp_dict = mtf_master; - temp_trns = n_trns; + mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL); + + mtf.seq_nums = xmalloc (dict_get_var_cnt (mtf.dict) + * sizeof *mtf.seq_nums); + memset (mtf.seq_nums, 0, + dict_get_var_cnt (mtf.dict) * sizeof *mtf.seq_nums); + mtf.mtf_case = xmalloc (dict_get_case_size (mtf.dict)); + + mtf_read_nonactive_records (NULL); + if (seen & 2) + procedure (mtf_processing, NULL); + mtf_processing_finish (NULL); - process_active_file (mtf_read_nonactive_records, mtf_processing, - mtf_processing_finish); - mtf_master = NULL; + dict_destroy (default_dict); + default_dict = mtf.dict; + mtf.dict = NULL; + vfm_source = mtf.sink->class->make_source (mtf.sink); + free_case_sink (mtf.sink); - mtf_free (); + mtf_free (&mtf); return CMD_SUCCESS; lossage: - mtf_free (); + mtf_free (&mtf); return CMD_FAILURE; } /* Repeats 2...8 an arbitrary number of times. */ static void -mtf_processing_finish (void) +mtf_processing_finish (void *mtf_) { + struct mtf_proc *mtf = mtf_; + struct mtf_file *iter; + /* Find the active file and delete it. */ - { - struct mtf_file *iter; - - for (iter = mtf_head; iter; iter = iter->next) - if (iter->handle == NULL) - { - mtf_delete_file_in_place (&iter); - break; - } - } + for (iter = mtf->head; iter; iter = iter->next) + if (iter->handle == NULL) + { + mtf_delete_file_in_place (mtf, &iter); + break; + } - while (mtf_head && mtf_head->type == MTF_FILE) - if (!mtf_processing (temp_case)) + while (mtf->head && mtf->head->type == MTF_FILE) + if (!mtf_processing (NULL, mtf)) break; } @@ -982,36 +904,37 @@ static void mtf_free_file (struct mtf_file *file) { fh_close_handle (file->handle); - if (file->dict && file->dict != &default_dict) - free_dictionary (file->dict); + if (file->dict != NULL && file->dict != default_dict) + dict_destroy (file->dict); free (file->by); if (file->handle) - free (file->input); + case_destroy (&file->input); free (file); } /* Free all the data for the MATCH FILES procedure. */ static void -mtf_free (void) +mtf_free (struct mtf_proc *mtf) { struct mtf_file *iter, *next; - for (iter = mtf_head; iter; iter = next) + for (iter = mtf->head; iter; iter = next) { next = iter->next; mtf_free_file (iter); } - free (mtf_by); - if (mtf_master) - free_dictionary (mtf_master); + free (mtf->by); + if (mtf->dict) + dict_destroy (mtf->dict); + free (mtf->seq_nums); } /* Remove *FILE from the mtf_file chain. Make *FILE point to the next file in the chain, or to NULL if was the last in the chain. */ static void -mtf_delete_file_in_place (struct mtf_file **file) +mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file) { struct mtf_file *f = *file; @@ -1019,72 +942,73 @@ mtf_delete_file_in_place (struct mtf_file **file) f->prev->next = f->next; if (f->next) f->next->prev = f->prev; - if (f == mtf_head) - mtf_head = f->next; - if (f == mtf_tail) - mtf_tail = f->prev; + if (f == mtf->head) + mtf->head = f->next; + if (f == mtf->tail) + mtf->tail = f->prev; *file = f->next; { int i; - for (i = 0; i < f->dict->nvar; i++) + for (i = 0; i < dict_get_var_cnt (f->dict); i++) { - struct variable *v = f->dict->var[i]; + struct variable *v = dict_get_var (f->dict, i); + union value *out = case_data_rw (mtf->mtf_case, v->p.mtf.master->fv); if (v->type == NUMERIC) - compaction_case->data[v->p.mtf.master->fv].f = SYSMIS; + out->f = SYSMIS; else - memset (compaction_case->data[v->p.mtf.master->fv].s, ' ', - v->width); + memset (out->s, ' ', v->width); } } - + mtf_free_file (f); } /* Read a record from every input file except the active file. */ static void -mtf_read_nonactive_records (void) +mtf_read_nonactive_records (void *mtf_ UNUSED) { + struct mtf_proc *mtf = mtf_; struct mtf_file *iter; - for (iter = mtf_head; iter; ) + for (iter = mtf->head; iter; ) { if (iter->handle) { - assert (iter->input == NULL); - iter->input = xmalloc (sizeof *iter->input * iter->dict->nval); - - if (!sfm_read_case (iter->handle, iter->input, iter->dict)) - mtf_delete_file_in_place (&iter); + if (!sfm_read_case (iter->handle, &iter->input, iter->dict)) + mtf_delete_file_in_place (mtf, &iter); else iter = iter->next; } else - { - iter->input = temp_case->data; - iter = iter->next; - } + iter = iter->next; } } /* Compare the BY variables for files A and B; return -1 if A < B, 0 if A == B, 1 if A > B. */ static inline int -mtf_compare_BY_values (struct mtf_file *a, struct mtf_file *b) +mtf_compare_BY_values (struct mtf_proc *mtf, + struct mtf_file *a, struct mtf_file *b, + struct ccase *c) { + struct ccase *a_input, *b_input; int i; - - for (i = 0; i < mtf_n_by; i++) + + assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1); + a_input = case_is_null (&a->input) ? c : &a->input; + b_input = case_is_null (&b->input) ? c : &b->input; + for (i = 0; i < mtf->by_cnt; i++) { assert (a->by[i]->type == b->by[i]->type); assert (a->by[i]->width == b->by[i]->width); if (a->by[i]->type == NUMERIC) { - double af = a->input[a->by[i]->fv].f; - double bf = b->input[b->by[i]->fv].f; + double af = case_num (a_input, a->by[i]->fv); + double bf = case_num (b_input, b->by[i]->fv); if (af < bf) return -1; @@ -1096,8 +1020,8 @@ mtf_compare_BY_values (struct mtf_file *a, struct mtf_file *b) int result; assert (a->by[i]->type == ALPHA); - result = memcmp (a->input[a->by[i]->fv].s, - b->input[b->by[i]->fv].s, + result = memcmp (case_str (a_input, a->by[i]->fv), + case_str (b_input, b->by[i]->fv), a->by[i]->width); if (result < 0) return -1; @@ -1108,22 +1032,14 @@ mtf_compare_BY_values (struct mtf_file *a, struct mtf_file *b) return 0; } -/* Used to determine whether we've already initialized this - variable. */ -static int mtf_seq_no = 0; - /* Perform one iteration of steps 3...7 above. */ static int -mtf_processing (struct ccase *c unused) +mtf_processing (struct ccase *c, void *mtf_ UNUSED) { - /* List of files with minimum BY values. */ - struct mtf_file *min_head, *min_tail; - - /* List of files with non-minimum BY values. */ - struct mtf_file *max_head, *max_tail; - - /* Iterator. */ - struct mtf_file *iter; + struct mtf_proc *mtf = mtf_; + struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */ + struct mtf_file *max_head, *max_tail; /* Files with non-minimum BY values. */ + struct mtf_file *iter; /* Iterator. */ for (;;) { @@ -1131,7 +1047,7 @@ mtf_processing (struct ccase *c unused) return because that would cause a record to be skipped. */ int advance = 1; - if (mtf_head->type == MTF_TABLE) + if (mtf->head->type == MTF_TABLE) return 0; /* 3. Find the FILE input record with minimum BY values. Store @@ -1140,11 +1056,11 @@ mtf_processing (struct ccase *c unused) 4. Find all the FILE input records with BY values identical to the minimums. Store all the values from these input records into the output record. */ - min_head = min_tail = mtf_head; + min_head = min_tail = mtf->head; max_head = max_tail = NULL; - for (iter = mtf_head->next; iter && iter->type == MTF_FILE; + for (iter = mtf->head->next; iter && iter->type == MTF_FILE; iter = iter->next) - switch (mtf_compare_BY_values (min_head, iter)) + switch (mtf_compare_BY_values (mtf, min_head, iter, c)) { case -1: if (max_head) @@ -1189,7 +1105,7 @@ mtf_processing (struct ccase *c unused) advance = 0; again: - switch (mtf_compare_BY_values (min_head, iter)) + switch (mtf_compare_BY_values (mtf, min_head, iter, c)) { case -1: if (max_head) @@ -1205,9 +1121,9 @@ mtf_processing (struct ccase *c unused) case 1: if (iter->handle == NULL) return 1; - if (sfm_read_case (iter->handle, iter->input, iter->dict)) + if (sfm_read_case (iter->handle, &iter->input, iter->dict)) goto again; - mtf_delete_file_in_place (&iter); + mtf_delete_file_in_place (mtf, &iter); break; default: @@ -1218,7 +1134,7 @@ mtf_processing (struct ccase *c unused) } /* Next sequence number. */ - mtf_seq_no++; + mtf->seq_num++; /* Store data to all the records we are using. */ if (min_tail) @@ -1227,29 +1143,24 @@ mtf_processing (struct ccase *c unused) { int i; - for (i = 0; i < iter->dict->nvar; i++) + for (i = 0; i < dict_get_var_cnt (iter->dict); i++) { - struct variable *v = iter->dict->var[i]; + struct variable *v = dict_get_var (iter->dict, i); + struct ccase *record; + union value *out; - if (v->p.mtf.master->foo == mtf_seq_no) + if (mtf->seq_nums[v->p.mtf.master->index] == mtf->seq_num) continue; - v->p.mtf.master->foo = mtf_seq_no; - -#if 0 - printf ("%s/%s: dest-fv=%d, src-fv=%d\n", - fh_handle_name (iter->handle), - v->name, - v->p.mtf.master->fv, v->fv); -#endif + mtf->seq_nums[v->p.mtf.master->index] = mtf->seq_num; + + record = case_is_null (&iter->input) ? c : &iter->input; + + assert (v->type == NUMERIC || v->type == ALPHA); + out = case_data_rw (mtf->mtf_case, v->p.mtf.master->fv); if (v->type == NUMERIC) - compaction_case->data[v->p.mtf.master->fv].f - = iter->input[v->fv].f; + out->f = case_num (record, v->fv); else - { - assert (v->type == ALPHA); - memcpy (compaction_case->data[v->p.mtf.master->fv].s, - iter->input[v->fv].s, v->width); - } + memcpy (out->s, case_str (record, v->fv), v->width); } } @@ -1260,25 +1171,20 @@ mtf_processing (struct ccase *c unused) { int i; - for (i = 0; i < iter->dict->nvar; i++) + for (i = 0; i < dict_get_var_cnt (iter->dict); i++) { - struct variable *v = iter->dict->var[i]; + struct variable *v = dict_get_var (iter->dict, i); + union value *out; - if (v->p.mtf.master->foo == mtf_seq_no) + if (mtf->seq_nums[v->p.mtf.master->index] == mtf->seq_num) continue; - v->p.mtf.master->foo = mtf_seq_no; - -#if 0 - printf ("%s/%s: dest-fv=%d\n", - fh_handle_name (iter->handle), - v->name, - v->p.mtf.master->fv); -#endif + mtf->seq_nums[v->p.mtf.master->index] = mtf->seq_num; + + out = case_data_rw (mtf->mtf_case, v->p.mtf.master->fv); if (v->type == NUMERIC) - compaction_case->data[v->p.mtf.master->fv].f = SYSMIS; + out->f = SYSMIS; else - memset (compaction_case->data[v->p.mtf.master->fv].s, ' ', - v->width); + memset (out->s, ' ', v->width); } if (iter->handle == NULL) @@ -1286,7 +1192,7 @@ mtf_processing (struct ccase *c unused) } /* 6. Write the output record. */ - process_active_file_output_case (); + mtf->sink->class->write (mtf->sink, mtf->mtf_case); /* 7. Read another record from each input file FILE and TABLE that we stored values from above. If we come to the end of @@ -1298,10 +1204,8 @@ mtf_processing (struct ccase *c unused) if (iter->handle) { - assert (iter->input != NULL); - - if (!sfm_read_case (iter->handle, iter->input, iter->dict)) - mtf_delete_file_in_place (&iter); + if (!sfm_read_case (iter->handle, &iter->input, iter->dict)) + mtf_delete_file_in_place (mtf, &iter); } iter = next; @@ -1311,66 +1215,74 @@ mtf_processing (struct ccase *c unused) break; } - return (mtf_head && mtf_head->type != MTF_TABLE); + return (mtf->head && mtf->head->type != MTF_TABLE); } /* Merge the dictionary for file F into the master dictionary - mtf_master. */ + mtf_dict. */ static int -mtf_merge_dictionary (struct mtf_file *f) +mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f) { - struct dictionary *const m = mtf_master; struct dictionary *d = f->dict; - - if (d->label && m->label == NULL) - m->label = xstrdup (d->label); + const char *d_docs, *m_docs; + + if (dict_get_label (m) == NULL) + dict_set_label (m, dict_get_label (d)); - if (d->documents) + d_docs = dict_get_documents (d); + m_docs = dict_get_documents (m); + if (d_docs != NULL) { - m->documents = xrealloc (m->documents, - 80 * (m->n_documents + d->n_documents)); - memcpy (&m->documents[80 * m->n_documents], - d->documents, 80 * d->n_documents); - m->n_documents += d->n_documents; + if (m_docs == NULL) + dict_set_documents (m, d_docs); + else + { + char *new_docs; + size_t new_len; + + new_len = strlen (m_docs) + strlen (d_docs); + new_docs = xmalloc (new_len + 1); + strcpy (new_docs, m_docs); + strcat (new_docs, d_docs); + dict_set_documents (m, new_docs); + free (new_docs); + } } - + + dict_compact_values (d); + { int i; - d->nval = 0; - for (i = 0; i < d->nvar; i++) + for (i = 0; i < dict_get_var_cnt (d); i++) { - struct variable *dv = d->var[i]; - struct variable *mv = find_dict_variable (m, dv->name); + struct variable *dv = dict_get_var (d, i); + struct variable *mv = dict_lookup_var (m, dv->name); - dv->fv = d->nval; - d->nval += dv->nv; - assert (dv->type == ALPHA || dv->width == 0); assert (!mv || mv->type == ALPHA || mv->width == 0); if (mv && dv->width == mv->width) { - if (dv->val_lab && !mv->val_lab) - mv->val_lab = copy_value_labels (dv->val_lab); - if (dv->miss_type != MISSING_NONE && mv->miss_type == MISSING_NONE) + if (val_labs_count (dv->val_labs) + && !val_labs_count (mv->val_labs)) + mv->val_labs = val_labs_copy (dv->val_labs); + if (dv->miss_type != MISSING_NONE + && mv->miss_type == MISSING_NONE) copy_missing_values (mv, dv); } if (mv && dv->label && !mv->label) mv->label = xstrdup (dv->label); - if (!mv) - { - mv = force_dup_variable (m, dv, dv->name); - - /* Used to make sure we initialize each variable in the - master dictionary exactly once per case. */ - mv->foo = mtf_seq_no; - } + if (!mv) + { + mv = dict_clone_var (m, dv, dv->name); + assert (mv != NULL); + } else if (mv->width != dv->width) { msg (SE, _("Variable %s in file %s (%s) has different " "type or width from the same variable in " "earlier file (%s)."), - dv->name, fh_handle_name (f->handle), + dv->name, handle_get_name (f->handle), var_type_description (dv), var_type_description (mv)); return 0; } @@ -1389,14 +1301,10 @@ cmd_import (void) { struct file_handle *handle = NULL; struct dictionary *dict; + struct get_pgm *pgm; int options = GTSV_OPT_NONE; int type; - int i; - int nval; - - lex_match_id ("IMPORT"); - for (;;) { lex_match ('/'); @@ -1437,46 +1345,21 @@ cmd_import (void) if (dict == NULL) return CMD_FAILURE; -#if DEBUGGING - dump_dict_variables (dict); -#endif if (0 == trim_dictionary (dict, &options)) { fh_close_handle (handle); return CMD_FAILURE; } -#if DEBUGGING - dump_dict_variables (dict); -#endif - - /* Set the fv and lv elements of all variables remaining in the - dictionary. */ - nval = 0; - for (i = 0; i < dict->nvar; i++) - { - struct variable *v = dict->var[i]; - - v->fv = nval; - nval += v->nv; - } - dict->nval = nval; - assert (nval); -#if DEBUGGING - printf (_("IMPORT translation table from file to memory:\n")); - for (i = 0; i < dict->nvar; i++) - { - struct variable *v = dict->var[i]; + dict_compact_values (dict); - printf (_(" %8s from %3d,%3d to %3d,%3d\n"), v->name, - v->get.fv, v->get.nv, v->fv, v->nv); - } -#endif + dict_destroy (default_dict); + default_dict = dict; - restore_dictionary (dict); - - vfm_source = &import_source; - get_file = handle; + pgm = xmalloc (sizeof *pgm); + pgm->handle = handle; + pgm->case_size = dict_get_case_size (default_dict); + vfm_source = create_case_source (&import_source_class, default_dict, pgm); return CMD_SUCCESS; } @@ -1484,26 +1367,26 @@ cmd_import (void) /* Reads all the cases from the data file and passes them to write_case(). */ static void -import_source_read (void) +import_source_read (struct case_source *source, + struct ccase *c, + write_case_func *write_case, write_case_data wc_data) { - while (pfm_read_case (get_file, temp_case->data, &default_dict) - && write_case ()) - ; - get_source_destroy_source (); + struct get_pgm *pgm = source->aux; + + while (pfm_read_case (pgm->handle, c, default_dict)) + if (!write_case (wc_data)) + break; } -struct case_stream import_source = +const struct case_source_class import_source_class = { + "IMPORT", NULL, import_source_read, - NULL, - NULL, - get_source_destroy_source, - NULL, - "IMPORT", + get_source_destroy, }; -static int export_write_case_func (struct ccase *c); +static int export_write_case_func (struct ccase *c, void *); /* Parses the EXPORT command. */ /* FIXME: same as cmd_save_internal(). */ @@ -1518,8 +1401,6 @@ cmd_export (void) int i; - lex_match_id ("EXPORT"); - lex_match ('/'); if (lex_match_id ("OUTFILE")) lex_match ('='); @@ -1528,67 +1409,59 @@ cmd_export (void) if (handle == NULL) return CMD_FAILURE; - dict = save_dictionary (); -#if DEBUGGING - dump_dict_variables (dict); -#endif - for (i = 0; i < dict->nvar; i++) - dict->var[i]->foo = i; + dict = dict_clone (default_dict); + for (i = 0; i < dict_get_var_cnt (dict); i++) + dict_get_var (dict, i)->aux = dict_get_var (default_dict, i); if (0 == trim_dictionary (dict, &options)) { fh_close_handle (handle); return CMD_FAILURE; } -#if DEBUGGING - dump_dict_variables (dict); -#endif - /* Write dictionary. */ if (!pfm_write_dictionary (handle, dict)) { - free_dictionary (dict); + dict_destroy (dict); fh_close_handle (handle); return CMD_FAILURE; } /* Fill in transformation structure. */ - t = trns = xmalloc (sizeof *t); + t = xmalloc (sizeof *t); t->h.proc = save_trns_proc; t->h.free = save_trns_free; t->f = handle; - t->nvar = dict->nvar; - t->var = xmalloc (sizeof *t->var * dict->nvar); - for (i = 0; i < dict->nvar; i++) - t->var[i] = dict->var[i]->foo; - t->case_buf = xmalloc (sizeof *t->case_buf * dict->nvar); - free_dictionary (dict); + t->nvar = dict_get_var_cnt (dict); + t->var = xmalloc (sizeof *t->var * t->nvar); + for (i = 0; i < t->nvar; i++) + t->var[i] = dict_get_var (dict, i)->aux; + t->case_buf = xmalloc (sizeof *t->case_buf * t->nvar); + dict_destroy (dict); - procedure (NULL, export_write_case_func, NULL); - save_trns_free ((struct trns_header *) t); + procedure (export_write_case_func, t); + save_trns_free (&t->h); return CMD_SUCCESS; } +/* Writes case C to the EXPORT file. */ static int -export_write_case_func (struct ccase *c) +export_write_case_func (struct ccase *c, void *aux) { - union value *p = (union value *) trns->case_buf; + struct save_trns *t = aux; + union value *p = (union value *) t->case_buf; int i; - for (i = 0; i < trns->nvar; i++) + for (i = 0; i < t->nvar; i++) { - struct variable *v = default_dict.var[trns->var[i]]; + struct variable *v = t->var[i]; if (v->type == NUMERIC) - *p++ = c->data[v->fv]; + (*p++).f = case_num (c, v->fv); else - (*p++).c = c->data[v->fv].s; + (*p++).c = (char *) case_str (c, v->fv); } - printf ("."); - fflush (stdout); - - pfm_write_case (trns->f, (union value *) trns->case_buf); + pfm_write_case (t->f, (union value *) t->case_buf); return 1; }