From f9d47b5bba8416419cf3bcd3aa23c2d40a05fcac Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 2 Mar 2004 20:00:33 +0000 Subject: [PATCH] Beginning of VFM cleanup. --- NEWS | 7 +- TODO | 11 +- configure.ac | 2 +- src/ChangeLog | 172 ++++ src/Makefile.am | 4 +- src/aggregate.c | 72 +- src/algorithm.c | 329 ++++++- src/algorithm.h | 76 +- src/command.c | 3 +- src/data-list.c | 1329 +++++++++++++------------- src/dfm.c | 6 +- src/file-handle.h | 2 +- src/file-handle.q | 2 +- src/file-type.c | 357 +++---- src/flip.c | 534 +++++------ src/format.c | 10 + src/format.h | 1 + src/frequencies.q | 1 + src/get.c | 137 +-- src/glob.c | 3 - src/heap.c | 269 ------ src/heap.h | 54 -- src/inpt-pgm.c | 94 +- src/levene.c | 1 + src/matrix-data.c | 763 +++++++-------- src/set.q | 15 +- src/settings.h | 5 + src/sort.c | 2049 ++++++++++++++++++++-------------------- src/sort.h | 31 +- src/var.h | 21 +- src/vars-atr.c | 6 +- src/vfm.c | 468 +++++---- src/vfm.h | 109 ++- src/vfmP.h | 19 - tests/command/print.sh | 2 +- tests/command/sort.sh | 5 +- 36 files changed, 3630 insertions(+), 3339 deletions(-) delete mode 100644 src/heap.c delete mode 100644 src/heap.h diff --git a/NEWS b/NEWS index 976105c0..3d345d30 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,5 @@ PSPP NEWS -- history of user-visible changes. -Time-stamp: <2003-12-09 11:33:29 blp> +Time-stamp: <2004-02-29 21:38:25 blp> Copyright (C) 1996-9, 2000 Free Software Foundation, Inc. See the end for copying conditions. @@ -9,6 +9,11 @@ Version 0.3.1 changes since 0.3.0: Update build system to Autoconf 2.58, Automake 1.7, gettext 0.12.1. + T-TEST is now implemented. + + Much of the code has been rewritten and refactored. It is now much + cleaner. + Version 0.3.0 changes since 0.2.3: Bugs fixed: diff --git a/TODO b/TODO index fa97f215..9eba3496 100644 --- a/TODO +++ b/TODO @@ -1,20 +1,15 @@ -Time-stamp: <2004-02-03 18:31:06 blp> +Time-stamp: <2004-02-24 19:22:15 blp> TODO ---- -random.c should not know about set_seed. +Use posix_fadvise(POSIX_FADV_SEQUENTIAL) where available. -Probably should get rid of approx.h. The user really needs to be responsible -for his own precision. +random.c should not know about set_seed. Use AFM files instead of Groff font files, and include AFMs for our default fonts with the distribution. -The way that data-in.c and data-out.c deal with strings is wrong. Instead of -the way it's done now, we should make it dynamically allocate a buffer and -return a pointer to it. This is a much safer interface. - Add libplot output driver. Suggested by Robert S. Maier : "it produces output in idraw-editable PS format, PCL5 format, xfig-editable format, Illustrator format,..., and can draw vector diff --git a/configure.ac b/configure.ac index d62b2bf4..379585a9 100644 --- a/configure.ac +++ b/configure.ac @@ -88,7 +88,7 @@ AC_REPLACE_FUNCS([memmove memset stpcpy strpbrk strerror strtol strtoul \ memchr getline getdelim strcasecmp strncasecmp memmem \ strtok_r]) AC_CHECK_FUNCS([gethostname strstr strtod __setfpucw isinf isnan finite \ - getpid feholdexcept]) + getpid feholdexcept mkdtemp]) AC_PROG_LN_S diff --git a/src/ChangeLog b/src/ChangeLog index 8b4b8d04..eb405c35 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,175 @@ +Tue Mar 2 11:36:52 2004 Ben Pfaff + + * frequencies.q: (cleanup_freq_tab) Avoid memory leak by + destroying hash table. + + * glob.c: (read_active_file) Variable not referenced, removed. + (cancel_input_pgm) Ditto. + + * levene.c: Add #include needed to call free(). + + * aggregate.c: (parse_aggregate_functions) Make `function' + variable const. + +Tue Mar 2 11:30:56 2004 Ben Pfaff + + Start working to eliminate VFM dependence on static variables. + + * command.c: (cmd_parse) Use case_source_is_class(). + + * data-list.c: Rewrite to eliminate use of static variables. + + * dfm.c: (cmd_begin_data) Use case_source_is_class(). + + * file-handle.q: (fh_handle_name) Make parameter const. + + * file-type.c: Rewrite to eliminate use of static variables. + + * flip.c: Rewrite to eliminate use of static variables. + + * format.c: (get_format_var_width) New function. + + * get.c: Eliminate use of static variables. + + * inpt-pgm.c: Eliminate use of static variables. + + * matrix-data.c: Eliminate use of static variables. + + * set.q: (set_max_workspace) New variable. + (cmd_set) Use SET WORKSPACE to modify set_max_workspace. + + * var.h: (struct case_list) Move here from vfmP.h. + + * vars-atr.c: (discard_variables) Handle new vfm_source type. + + * vfm.c: (vfm_source) Change type from struct case_stream to + struct case_source. + (vfm_sink) Change type from struct case_stream to struct + case_sink. + (static var paging) Rename workspace_overflow, all references + updated. + (procedure) Use new class structures. + (process_active_file) Ditto. + (process_active_file_write_case) Ditto. + (prepare_for_writing) Use set_max_workspace. Use new class + structures. + (close_active_file) Use new class structures. Free old sink. + (global var disk_source_file) Removed. + (global var disk_sink_file) Removed. + (disk_stream_init) Removed. + (disk_stream_read) Removed. + (disk_stream_write) Removed. + (disk_stream_mode) Removed. + (disk_stream_destroy_source) Removed. + (disk_stream_destroy_sink) Removed. + (global var vfm_disk_stream) Removed. + (disk_sink_create) New function. + (disk_sink_write) New function. + (disk_sink_destroy) New function. + (disk_sink_make_source) New function. + (disk_sink_class) New static var. + (disk_source_read) New function. + (disk_source_destroy) New function. + (global var vfm_source_class) New var. + (global var memory_source_cases) Removed. + (global var memory_sink_cases) Removed. + (global var memory_sink_max_cases) Removed. + (struct memory_sink_info) New struct. + (memory_stream_init) Removed. + (memory_stream_read) Removed. + (memory_stream_write) Removed. + (memory_stream_mode) Removed. + (memory_stream_destroy_source) Removed. + (memory_stream_destroy_sink) Removed. + (global var vfm_memory_stream) Removed. + (page_to_disk) Renamed write_active_file_to_disk(). + (memory_sink_create) New function. + (memory_sink_write) New function. + (memory_sink_destroy) New function. + (memory_sink_make_source) New function. + (memory_sink_class) New static var. + (memory_source_read) New function. + (memory_source_destroy) New function. + (memory_source_get_cases) New function. + (memory_source_set_cases) New function. + (global var vfm_source_class) New var. + (procedure_write_case) Use new class structures. + (create_case_source) New function. + (case_source_is_complex) New function. + (case_source_is_class) New function. + (create_case_sink) New function. + + * vfm.h: (global variable reinit_sysmis) Not used, removed. + (global variable reinit_blanks) Not used, removed. + (global variable init_zero) Not used, removed. + (global variable init_blanks) Not used, removed. + (struct case_source) New struct. + (struct case_source_class) New struct. + (struct case_sink) New struct. + (struct case_sink_class) New struct. + (struct case_stream) Removed. + + * vfmP.h: (struct case_list) Moved to var.h. + +Tue Mar 2 11:28:30 2004 Ben Pfaff + + * algorithm.c: (count_equal) New function. + (count_if) New function. + (unique) Add assertions. + (partition) Add assertions. + (is_partitioned) New function. + (copy_if) Add assertions. + (remove_equal) Add assertions. + (lexicographical_compare) Rename lexicographical_compare_3way. + (sort) Add assertions. Rephrase some code. + (is_sorted) New function. + +Sun Feb 29 23:24:57 2004 Ben Pfaff + + Rewrite SORT CASES. + + * sort.c: Completely rewrite. + + * sort.h: Expose interface via struct sort_cases_pgm, not via + global variables. + + * aggregate.c: (sort) New static var. + (cmd_aggregate) Use sort. + (create_sysfile) Ditto. + (aggregate_single_case) Ditto. + (dump_aggregate_info) Ditto. + (agr_00x_end_func) Ditto. + (debug_print) Ditto. + + * var.h: (enum SRT_ASCEND) Removed. + (enum SRT_DESCEND) Removed. + (struct sort_cases_proc) Removed. + (struct variable) Remove p.srt member. + +Sun Feb 29 23:22:45 2004 Ben Pfaff + + Get rid of the old, crappy heap structure and replace it by a new, + shiny, C++ STL-like heap structure. + + * Makefile.am: (pspp_SOURCES) Remove heap.c, heap.h. + + * algorithm.c: (push_heap) New function. + (heapify) Ditto. + (pop_heap) Ditto. + (make_heap) Ditto. + (sort_heap) Ditto. + (is_heap) Ditto. + + * heap.c: Removed. + + * heap.h: Removed. + +Sun Feb 29 23:21:53 2004 Ben Pfaff + + Increase warning level. + + * Makefile.am: (AM_CFLAGS) Remove -Wnouninitialized. + Sat Feb 21 17:38:58 WST 2004 John Darrington * main.c: Added a signal handler for SIGSEGV requesting a bug report. diff --git a/src/Makefile.am b/src/Makefile.am index 7b8a51ba..b9826373 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -12,7 +12,7 @@ AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src -I$(top_srcdir)/lib \ -I$(top_srcdir)/intl if cc_is_gcc -AM_CFLAGS=-Wall -W -Wno-uninitialized -Wwrite-strings -Wstrict-prototypes \ +AM_CFLAGS=-Wall -W -Wwrite-strings -Wstrict-prototypes \ -Wpointer-arith -Wno-sign-compare -Wmissing-prototypes endif @@ -43,7 +43,7 @@ dictionary.c do-if.c do-ifP.h error.c error.h expr-evl.c expr-opt.c \ expr-prs.c expr.h exprP.h file-handle.h file-type.c \ filename.c filename.h flip.c font.h format.c format.def format.h \ formats.c get.c getline.c getline.h glob.c glob.h \ -groff-font.c hash.c hash.h heap.c heap.h html.c htmlP.h include.c \ +groff-font.c hash.c hash.h html.c htmlP.h include.c \ inpt-pgm.c lexer.c lexer.h levene.c levene.h \ log.h loop.c magic.c magic.h main.c \ main.h matrix-data.c matrix.c matrix.h mis-val.c misc.c misc.h \ diff --git a/src/aggregate.c b/src/aggregate.c index dbfad40f..0c274c4b 100644 --- a/src/aggregate.c +++ b/src/aggregate.c @@ -26,6 +26,7 @@ #include "file-handle.h" #include "lexer.h" #include "misc.h" +#include "pool.h" #include "settings.h" #include "sfm.h" #include "sort.h" @@ -76,7 +77,7 @@ struct agr_func }; /* Attributes of aggregation functions. */ -static struct agr_func agr_func_tab[] = +static const struct agr_func agr_func_tab[] = { {"", 0, -1, {0, 0, 0}}, {"SUM", 0, -1, {FMT_F, 8, 2}}, @@ -116,6 +117,9 @@ enum /* ITEMWISE or COLUMNWISE. */ static int missing; +/* Sort program. */ +static struct sort_cases_pgm *sort; + /* Aggregate variables. */ static struct agr_var *agr_first, *agr_next; @@ -157,15 +161,12 @@ static void debug_print (int flags); int cmd_aggregate (void) { - /* From sort.c. */ - int parse_sort_variables (void); - /* Have we seen these subcommands? */ unsigned seen = 0; outfile = NULL; missing = ITEMWISE; - v_sort = NULL; + sort = NULL; prev_case = NULL; agr_dict = dict_create (); @@ -183,7 +184,7 @@ cmd_aggregate (void) { if (seen & 1) { - free (v_sort); + destroy_sort_cases_pgm (sort); dict_destroy (agr_dict); msg (SE, _("%s subcommand given multiple times."),"OUTFILE"); return CMD_FAILURE; @@ -198,7 +199,7 @@ cmd_aggregate (void) outfile = fh_parse_file_handle (); if (outfile == NULL) { - free (v_sort); + destroy_sort_cases_pgm (sort); dict_destroy (agr_dict); return CMD_FAILURE; } @@ -209,7 +210,7 @@ cmd_aggregate (void) lex_match ('='); if (!lex_match_id ("COLUMNWISE")) { - free (v_sort); + destroy_sort_cases_pgm (sort); dict_destroy (agr_dict); lex_error (_("while expecting COLUMNWISE")); return CMD_FAILURE; @@ -224,7 +225,7 @@ cmd_aggregate (void) { if (seen & 8) { - free (v_sort); + destroy_sort_cases_pgm (sort); dict_destroy (agr_dict); msg (SE, _("%s subcommand given multiple times."),"BREAK"); return CMD_FAILURE; @@ -232,7 +233,8 @@ cmd_aggregate (void) seen |= 8; lex_match ('='); - if (!parse_sort_variables ()) + sort = parse_sort (); + if (sort == NULL) { dict_destroy (agr_dict); return CMD_FAILURE; @@ -241,11 +243,11 @@ cmd_aggregate (void) { int i; - for (i = 0; i < nv_sort; i++) + for (i = 0; i < sort->var_cnt; i++) { struct variable *v; - v = dict_clone_var (agr_dict, v_sort[i], v_sort[i]->name); + v = dict_clone_var (agr_dict, sort->vars[i], sort->vars[i]->name); assert (v != NULL); } } @@ -261,7 +263,7 @@ cmd_aggregate (void) if (!parse_aggregate_functions ()) { free_aggregate_functions (); - free (v_sort); + destroy_sort_cases_pgm (sort); return CMD_FAILURE; } @@ -312,7 +314,7 @@ cmd_aggregate (void) if (outfile != NULL) type |= 4; - if (nv_sort != 0 && (seen & 4) == 0) + if (sort != NULL && (seen & 4) == 0) type |= 2; if (temporary) type |= 1; @@ -323,7 +325,7 @@ cmd_aggregate (void) cancel_temporary (); /* fall through */ case 2: - sort_cases (0); + sort_cases (sort, 0); goto case0; case 1: @@ -367,11 +369,11 @@ cmd_aggregate (void) case 6: case 7: - sort_cases (1); + sort_cases (sort, 1); if (!create_sysfile ()) goto lossage; - read_sort_output (agr_11x_func, NULL); + read_sort_output (sort, agr_11x_func, NULL); { struct ccase *save_temp_case = temp_case; @@ -391,7 +393,7 @@ cmd_aggregate (void) free (buf_1xx); /* Clean up. */ - free (v_sort); + destroy_sort_cases_pgm (sort); free_aggregate_functions (); free (prev_case); @@ -399,7 +401,7 @@ cmd_aggregate (void) lossage: /* Clean up. */ - free (v_sort); + destroy_sort_cases_pgm (sort); free_aggregate_functions (); free (prev_case); @@ -418,7 +420,7 @@ create_sysfile (void) if (!sfm_write_dictionary (&w)) { free_aggregate_functions (); - free (v_sort); + destroy_sort_cases_pgm (sort); dict_destroy (agr_dict); return 0; } @@ -443,7 +445,7 @@ parse_aggregate_functions (void) int n_dest; int include_missing; - struct agr_func *function; + const struct agr_func *function; int func_index; union value arg[2]; @@ -781,8 +783,8 @@ aggregate_single_case (struct ccase *input, struct ccase *output) { int i; - for (i = 0; i < nv_sort; i++) - n_elem += v_sort[i]->nv; + for (i = 0; i < sort->var_cnt; i++) + n_elem += sort->vars[i]->nv; } prev_case = xmalloc (sizeof *prev_case * n_elem); @@ -792,9 +794,9 @@ aggregate_single_case (struct ccase *input, struct ccase *output) union value *iter = prev_case; int i; - for (i = 0; i < nv_sort; i++) + for (i = 0; i < sort->var_cnt; i++) { - struct variable *v = v_sort[i]; + struct variable *v = sort->vars[i]; if (v->type == NUMERIC) (iter++)->f = input->data[v->fv].f; @@ -817,9 +819,9 @@ aggregate_single_case (struct ccase *input, struct ccase *output) union value *iter = prev_case; int i; - for (i = 0; i < nv_sort; i++) + for (i = 0; i < sort->var_cnt; i++) { - struct variable *v = v_sort[i]; + struct variable *v = sort->vars[i]; switch (v->type) { @@ -856,9 +858,9 @@ not_equal: union value *iter = prev_case; int i; - for (i = 0; i < nv_sort; i++) + for (i = 0; i < sort->var_cnt; i++) { - struct variable *v = v_sort[i]; + struct variable *v = sort->vars[i]; if (v->type == NUMERIC) (iter++)->f = input->data[v->fv].f; @@ -1050,8 +1052,8 @@ dump_aggregate_info (struct ccase *output) { int i; - for (i = 0; i < nv_sort; i++) - n_elem += v_sort[i]->nv; + for (i = 0; i < sort->var_cnt; i++) + n_elem += sort->vars[i]->nv; } debug_printf (("n_elem=%d:", n_elem)); memcpy (output->data, prev_case, sizeof (union value) * n_elem); @@ -1212,7 +1214,7 @@ agr_00x_end_func (void *aux UNUSED) active file. */ dump_aggregate_info (compaction_case); vfm_sink_info.ncases++; - vfm_sink->write (); + vfm_sink->class->write (vfm_sink, temp_case); } /* Transform the aggregate case buf_1xx, in internal format, to system @@ -1325,9 +1327,9 @@ debug_print (int flags) int i; printf (" /BREAK="); - for (i = 0; i < nv_sort; i++) - printf ("%s(%c) ", v_sort[i]->name, - v_sort[i]->p.srt.order == SRT_ASCEND ? 'A' : 'D'); + for (i = 0; i < sort->var_cnt; i++) + printf ("%s(%c) ", sort->vars[i]->name, + sort->vars[i]->p.srt.order == SRT_ASCEND ? 'A' : 'D'); putc ('\n', stdout); } diff --git a/src/algorithm.c b/src/algorithm.c index 0982aeb7..aaa2b846 100644 --- a/src/algorithm.c +++ b/src/algorithm.c @@ -91,21 +91,28 @@ #include #include "algorithm.h" -#include #include #include #include #include "alloc.h" #include "random.h" + +/* Some of the assertions in this file are very expensive. If + we're optimizing, don't include them. */ +#if __OPTIMIZE__ +#define NDEBUG +#endif +#include /* Finds an element in ARRAY, which contains COUNT elements of SIZE bytes each, using COMPARE for comparisons. Returns the first element in ARRAY that matches TARGET, or a null pointer on failure. AUX is passed to each comparison as auxiliary data. */ -void *find (const void *array, size_t count, size_t size, - const void *target, - algo_compare_func *compare, void *aux) +void * +find (const void *array, size_t count, size_t size, + const void *target, + algo_compare_func *compare, void *aux) { const unsigned char *element = array; @@ -119,6 +126,51 @@ void *find (const void *array, size_t count, size_t size, return NULL; } + +/* Counts and return the number of elements in ARRAY, which + contains COUNT elements of SIZE bytes each, which are equal to + ELEMENT as compared with COMPARE. AUX is passed as auxiliary + data to COMPARE. */ +size_t +count_equal (const void *array, size_t count, size_t size, + const void *element, + algo_compare_func *compare, void *aux) +{ + const unsigned char *first = array; + size_t equal_cnt = 0; + + while (count-- > 0) + { + if (compare (element, first, aux) == 0) + equal_cnt++; + + first += size; + } + + return equal_cnt; +} + +/* Counts and return the number of elements in ARRAY, which + contains COUNT elements of SIZE bytes each, for which + PREDICATE returns nonzero. AUX is passed as auxiliary data to + PREDICATE. */ +size_t +count_if (const void *array, size_t count, size_t size, + algo_predicate_func *predicate, void *aux) +{ + const unsigned char *first = array; + size_t nonzero_cnt = 0; + + while (count-- > 0) + { + if (predicate (first, aux) != 0) + nonzero_cnt++; + + first += size; + } + + return nonzero_cnt; +} /* Byte-wise swap two items of size SIZE. */ #define SWAP(a, b, size) \ @@ -148,8 +200,11 @@ unique (void *array, size_t count, size_t size, for (;;) { first += size; - if (first >= last) - return count; + if (first >= last) + { + assert (adjacent_find_equal (array, count, size, compare, aux) == NULL); + return count; + } if (compare (result, first, aux)) { @@ -181,8 +236,9 @@ size_t partition (void *array, size_t count, size_t size, algo_predicate_func *predicate, void *aux) { + size_t nonzero_cnt = count; char *first = array; - char *last = first + count * size; + char *last = first + nonzero_cnt * size; for (;;) { @@ -191,13 +247,13 @@ partition (void *array, size_t count, size_t size, for (;;) { if (first == last) - return count; + goto done; else if (!predicate (first, aux)) break; first += size; } - count--; + nonzero_cnt--; /* Move LAST backward to point to last element that passes PREDICATE. */ @@ -206,11 +262,11 @@ partition (void *array, size_t count, size_t size, last -= size; if (first == last) - return count; + goto done; else if (predicate (last, aux)) break; else - count--; + nonzero_cnt--; } /* By swapping FIRST and LAST we extend the starting and @@ -219,6 +275,32 @@ partition (void *array, size_t count, size_t size, SWAP (first, last, size); first += size; } + + done: + assert (is_partitioned (array, count, size, nonzero_cnt, predicate, aux)); + return nonzero_cnt; +} + +/* Checks whether ARRAY, which contains COUNT elements of SIZE + bytes each, is partitioned such that PREDICATE returns nonzero + for the first NONZERO_CNT elements and zero for the remaining + elements. AUX is passed as auxiliary data to PREDICATE. */ +int +is_partitioned (const void *array, size_t count, size_t size, + size_t nonzero_cnt, + algo_predicate_func *predicate, void *aux) +{ + const unsigned char *first = array; + size_t idx; + + assert (nonzero_cnt <= count); + for (idx = 0; idx < nonzero_cnt; idx++) + if (predicate (first + idx * size, aux) == 0) + return 0; + for (idx = nonzero_cnt; idx < count; idx++) + if (predicate (first + idx * size, aux) != 0) + return 0; + return 1; } /* A algo_random_func that uses random.h. */ @@ -258,6 +340,7 @@ copy_if (const void *array, size_t count, size_t size, const unsigned char *input = array; const unsigned char *last = input + size * count; unsigned char *output = result; + size_t nonzero_cnt = 0; while (input < last) { @@ -265,14 +348,16 @@ copy_if (const void *array, size_t count, size_t size, { memcpy (output, input, size); output += size; + nonzero_cnt++; } - else - count--; input += size; } - return count; + assert (nonzero_cnt == count_if (array, count, size, predicate, aux)); + assert (nonzero_cnt == count_if (result, nonzero_cnt, size, predicate, aux)); + + return nonzero_cnt; } /* A predicate and its auxiliary data. */ @@ -306,7 +391,7 @@ remove_equal (void *array, size_t count, size_t size, for (;;) { if (first >= last) - return count; + goto done; if (compare (first, element, aux) == 0) break; @@ -319,7 +404,7 @@ remove_equal (void *array, size_t count, size_t size, { first += size; if (first >= last) - return count; + goto done; if (compare (first, element, aux) == 0) { @@ -331,6 +416,8 @@ remove_equal (void *array, size_t count, size_t size, result += size; } + done: + assert (count_equal (array, count, size, element, compare, aux) == 0); return count; } @@ -383,6 +470,8 @@ binary_search (const void *array, size_t count, size_t size, return (void *) element; } } + + assert (find (array, count, size, value, compare, aux) == NULL); return NULL; } @@ -392,10 +481,10 @@ binary_search (const void *array, size_t count, size_t size, strcmp()-type result. AUX is passed to COMPARE as auxiliary data. */ int -lexicographical_compare (const void *array1, size_t count1, - const void *array2, size_t count2, - size_t size, - algo_compare_func *compare, void *aux) +lexicographical_compare_3way (const void *array1, size_t count1, + const void *array2, size_t count2, + size_t size, + algo_compare_func *compare, void *aux) { const unsigned char *first1 = array1; const unsigned char *first2 = array2; @@ -471,21 +560,20 @@ typedef struct stack size is needed (actually O(1) in this case)! */ void -sort (void *const pbase, size_t total_elems, size_t size, - algo_compare_func *cmp, void *aux) +sort (void *array, size_t count, size_t size, + algo_compare_func *compare, void *aux) { - register char *base_ptr = (char *) pbase; - + char *const first = array; const size_t max_thresh = MAX_THRESH * size; - if (total_elems == 0) + if (count == 0) /* Avoid lossage with unsigned arithmetic below. */ return; - if (total_elems > MAX_THRESH) + if (count > MAX_THRESH) { - char *lo = base_ptr; - char *hi = &lo[size * (total_elems - 1)]; + char *lo = first; + char *hi = &lo[size * (count - 1)]; stack_node stack[STACK_SIZE]; stack_node *top = stack + 1; @@ -502,13 +590,13 @@ sort (void *const pbase, size_t total_elems, size_t size, char *mid = lo + size * ((hi - lo) / size >> 1); - if ((*cmp) ((void *) mid, (void *) lo, aux) < 0) + if (compare (mid, lo, aux) < 0) SWAP (mid, lo, size); - if ((*cmp) ((void *) hi, (void *) mid, aux) < 0) + if (compare (hi, mid, aux) < 0) SWAP (mid, hi, size); else goto jump_over; - if ((*cmp) ((void *) mid, (void *) lo, aux) < 0) + if (compare (mid, lo, aux) < 0) SWAP (mid, lo, size); jump_over:; @@ -520,10 +608,10 @@ sort (void *const pbase, size_t total_elems, size_t size, that this algorithm runs much faster than others. */ do { - while ((*cmp) ((void *) left_ptr, (void *) mid, aux) < 0) + while (compare (left_ptr, mid, aux) < 0) left_ptr += size; - while ((*cmp) ((void *) mid, (void *) right_ptr, aux) < 0) + while (compare (mid, right_ptr, aux) < 0) right_ptr -= size; if (left_ptr < right_ptr) @@ -577,18 +665,18 @@ sort (void *const pbase, size_t total_elems, size_t size, } } - /* Once the BASE_PTR array is partially sorted by quicksort the rest + /* Once the FIRST array is partially sorted by quicksort the rest is completely sorted using insertion sort, since this is efficient - for partitions below MAX_THRESH size. BASE_PTR points to the beginning + for partitions below MAX_THRESH size. FIRST points to the beginning of the array to sort, and END_PTR points at the very last element in the array (*not* one beyond it!). */ #define min(x, y) ((x) < (y) ? (x) : (y)) { - char *const end_ptr = &base_ptr[size * (total_elems - 1)]; - char *tmp_ptr = base_ptr; - char *thresh = min(end_ptr, base_ptr + max_thresh); + char *const end_ptr = &first[size * (count - 1)]; + char *tmp_ptr = first; + char *thresh = min(end_ptr, first + max_thresh); register char *run_ptr; /* Find smallest element in first threshold and place it at the @@ -596,19 +684,19 @@ sort (void *const pbase, size_t total_elems, size_t size, and the operation speeds up insertion sort's inner loop. */ for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size) - if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, aux) < 0) + if (compare (run_ptr, tmp_ptr, aux) < 0) tmp_ptr = run_ptr; - if (tmp_ptr != base_ptr) - SWAP (tmp_ptr, base_ptr, size); + if (tmp_ptr != first) + SWAP (tmp_ptr, first, size); /* Insertion sort, running from left-hand-side up to right-hand-side. */ - run_ptr = base_ptr + size; + run_ptr = first + size; while ((run_ptr += size) <= end_ptr) { tmp_ptr = run_ptr - size; - while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, aux) < 0) + while (compare (run_ptr, tmp_ptr, aux) < 0) tmp_ptr -= size; tmp_ptr += size; @@ -629,6 +717,25 @@ sort (void *const pbase, size_t total_elems, size_t size, } } } + + assert (is_sorted (array, count, size, compare, aux)); +} + +/* Tests whether ARRAY, which contains COUNT elements of SIZE + bytes each, is sorted in order according to COMPARE. AUX is + passed to COMPARE as auxiliary data. */ +int +is_sorted (const void *array, size_t count, size_t size, + algo_compare_func *compare, void *aux) +{ + const unsigned char *first = array; + size_t idx; + + for (idx = 0; idx + 1 < count; idx++) + if (compare (first + idx * size, first + (idx + 1) * size, aux) > 0) + return 0; + + return 1; } /* Computes the generalized set difference, ARRAY1 minus ARRAY2, @@ -704,4 +811,140 @@ adjacent_find_equal (const void *array, size_t count, size_t size, return NULL; } + +/* ARRAY contains COUNT elements of SIZE bytes each. Initially + the first COUNT - 1 elements of these form a heap, followed by + a single element not part of the heap. This function adds the + final element, forming a heap of COUNT elements in ARRAY. + Uses COMPARE to compare elements, passing AUX as auxiliary + data. */ +void +push_heap (void *array, size_t count, size_t size, + algo_compare_func *compare, void *aux) +{ + unsigned char *first = array; + size_t i; + + assert (count < 1 || is_heap (array, count - 1, size, compare, aux)); + for (i = count; i > 1; i /= 2) + { + unsigned char *parent = first + (i / 2 - 1) * size; + unsigned char *element = first + (i - 1) * size; + if (compare (parent, element, aux) < 0) + SWAP (parent, element, size); + else + break; + } + assert (is_heap (array, count, size, compare, aux)); +} + +/* ARRAY contains COUNT elements of SIZE bytes each. Initially + the children of ARRAY[idx - 1] are heaps, but ARRAY[idx - 1] + may be smaller than its children. This function fixes that, + so that ARRAY[idx - 1] itself is a heap. Uses COMPARE to + compare elements, passing AUX as auxiliary data. */ +static void +heapify (void *array, size_t count, size_t size, + size_t idx, + algo_compare_func *compare, void *aux) +{ + unsigned char *first = array; + + for (;;) + { + size_t left = 2 * idx; + size_t right = left + 1; + size_t largest = idx; + + if (left <= count + && compare (first + size * (left - 1), + first + size * (idx - 1), aux) > 0) + largest = left; + + if (right <= count + && compare (first + size * (right - 1), + first + size * (largest - 1), aux) > 0) + largest = right; + + if (largest == idx) + break; + + SWAP (first + size * (idx - 1), first + size * (largest - 1), size); + idx = largest; + } +} + +/* ARRAY contains COUNT elements of SIZE bytes each. Initially + all COUNT elements form a heap. This function moves the + largest element in the heap to the final position in ARRAY and + reforms a heap of the remaining COUNT - 1 elements at the + beginning of ARRAY. Uses COMPARE to compare elements, passing + AUX as auxiliary data. */ +void +pop_heap (void *array, size_t count, size_t size, + algo_compare_func *compare, void *aux) +{ + unsigned char *first = array; + + assert (is_heap (array, count, size, compare, aux)); + SWAP (first, first + (count - 1) * size, size); + heapify (first, count - 1, size, 1, compare, aux); + assert (count < 1 || is_heap (array, count - 1, size, compare, aux)); +} + +/* Turns ARRAY, which contains COUNT elements of SIZE bytes, into + a heap. Uses COMPARE to compare elements, passing AUX as + auxiliary data. */ +void +make_heap (void *array, size_t count, size_t size, + algo_compare_func *compare, void *aux) +{ + size_t idx; + + for (idx = count / 2; idx >= 1; idx--) + heapify (array, count, size, idx, compare, aux); + assert (count < 1 || is_heap (array, count, size, compare, aux)); +} + +/* ARRAY contains COUNT elements of SIZE bytes each. Initially + all COUNT elements form a heap. This function turns the heap + into a fully sorted array. Uses COMPARE to compare elements, + passing AUX as auxiliary data. */ +void +sort_heap (void *array, size_t count, size_t size, + algo_compare_func *compare, void *aux) +{ + unsigned char *first = array; + size_t idx; + + assert (is_heap (array, count, size, compare, aux)); + for (idx = count; idx >= 2; idx--) + { + SWAP (first, first + (idx - 1) * size, size); + heapify (array, idx - 1, size, 1, compare, aux); + } + assert (is_sorted (array, count, size, compare, aux)); +} + +/* ARRAY contains COUNT elements of SIZE bytes each. This + function tests whether ARRAY is a heap and returns 1 if so, 0 + otherwise. Uses COMPARE to compare elements, passing AUX as + auxiliary data. */ +int +is_heap (const void *array, size_t count, size_t size, + algo_compare_func *compare, void *aux) +{ + const unsigned char *first = array; + size_t child; + + for (child = 2; child <= count; child++) + { + size_t parent = child / 2; + if (compare (first + (parent - 1) * size, + first + (child - 1) * size, aux) < 0) + return 0; + } + + return 1; +} diff --git a/src/algorithm.h b/src/algorithm.h index 44113007..f8f2b84d 100644 --- a/src/algorithm.h +++ b/src/algorithm.h @@ -27,12 +27,33 @@ void *find (const void *array, size_t count, size_t size, const void *target, algo_compare_func *compare, void *aux); +/* Counts and return the number of elements in ARRAY, which + contains COUNT elements of SIZE bytes each, which are equal to + ELEMENT as compared with COMPARE. AUX is passed as auxiliary + data to COMPARE. */ +size_t count_equal (const void *array, size_t count, size_t size, + const void *element, + algo_compare_func *compare, void *aux); + +/* Counts and return the number of elements in ARRAY, which + contains COUNT elements of SIZE bytes each, for which + PREDICATE returns nonzero. AUX is passed as auxiliary data to + PREDICATE. */ +size_t count_if (const void *array, size_t count, size_t size, + algo_predicate_func *predicate, void *aux); + /* Sorts ARRAY, which contains COUNT elements of SIZE bytes each, using COMPARE for comparisons. AUX is passed to each comparison as auxiliary data. */ void sort (void *array, size_t count, size_t size, algo_compare_func *compare, void *aux); +/* Tests whether ARRAY, which contains COUNT elements of SIZE + bytes each, is sorted in order according to COMPARE. AUX is + passed to COMPARE as auxiliary data. */ +int is_sorted (const void *array, size_t count, size_t size, + algo_compare_func *compare, void *aux); + /* Makes the elements in ARRAY unique, by moving up duplicates, and returns the new number of elements in the array. Sorted arrays only. Arguments same as for sort() above. */ @@ -51,6 +72,14 @@ size_t sort_unique (void *array, size_t count, size_t size, size_t partition (void *array, size_t count, size_t size, algo_predicate_func *predicate, void *aux); +/* Checks whether ARRAY, which contains COUNT elements of SIZE + bytes each, is partitioned such that PREDICATE returns nonzero + for the first NONZERO_CNT elements and zero for the remaining + elements. AUX is passed as auxiliary data to PREDICATE. */ +int is_partitioned (const void *array, size_t count, size_t size, + size_t nonzero_cnt, + algo_predicate_func *predicate, void *aux); + /* Randomly reorders ARRAY, which contains COUNT elements of SIZE bytes each. Uses RANDOM as a source of random data, passing AUX as the auxiliary data. RANDOM may be null to use a @@ -95,10 +124,10 @@ void *binary_search (const void *array, size_t count, size_t size, elements of SIZE bytes, according to COMPARE. Returns a strcmp()-type result. AUX is passed to COMPARE as auxiliary data. */ -int lexicographical_compare (const void *array1, size_t count1, - const void *array2, size_t count2, - size_t size, - algo_compare_func *compare, void *aux); +int lexicographical_compare_3way (const void *array1, size_t count1, + const void *array2, size_t count2, + size_t size, + algo_compare_func *compare, void *aux); /* Computes the generalized set difference, ARRAY1 minus ARRAY2, into RESULT, and returns the number of elements written to @@ -122,4 +151,43 @@ size_t set_difference (const void *array1, size_t count1, void *adjacent_find_equal (const void *array, size_t count, size_t size, algo_compare_func *compare, void *aux); +/* ARRAY contains COUNT elements of SIZE bytes each. Initially + the first COUNT - 1 elements of these form a heap, followed by + a single element not part of the heap. This function adds the + final element, forming a heap of COUNT elements in ARRAY. + Uses COMPARE to compare elements, passing AUX as auxiliary + data. */ +void push_heap (void *array, size_t count, size_t size, + algo_compare_func *compare, void *aux); + +/* ARRAY contains COUNT elements of SIZE bytes each. Initially + all COUNT elements form a heap. This function moves the + largest element in the heap to the final position in ARRAY and + reforms a heap of the remaining COUNT - 1 elements at the + beginning of ARRAY. Uses COMPARE to compare elements, passing + AUX as auxiliary data. */ +void pop_heap (void *array, size_t count, size_t size, + algo_compare_func *compare, void *aux); + +/* Turns ARRAY, which contains COUNT elements of SIZE bytes, into + a heap. Uses COMPARE to compare elements, passing AUX as + auxiliary data. */ +void make_heap (void *array, size_t count, size_t size, + algo_compare_func *compare, void *aux); + +/* ARRAY contains COUNT elements of SIZE bytes each. Initially + all COUNT elements form a heap. This function turns the heap + into a fully sorted array. Uses COMPARE to compare elements, + passing AUX as auxiliary data. */ +void sort_heap (void *array, size_t count, size_t size, + algo_compare_func *compare, void *aux); + +/* ARRAY contains COUNT elements of SIZE bytes each. This + function tests whether ARRAY is a heap and returns 1 if so, 0 + otherwise. Uses COMPARE to compare elements, passing AUX as + auxiliary data. */ +int is_heap (const void *array, size_t count, size_t size, + algo_compare_func *compare, void *aux); + + #endif /* sort-algo.h */ diff --git a/src/command.c b/src/command.c index b386317a..b12e46d0 100644 --- a/src/command.c +++ b/src/command.c @@ -201,7 +201,8 @@ cmd_parse (void) /* If we're in a FILE TYPE structure, only certain commands can be allowed. */ - if (pgm_state == STATE_INPUT && vfm_source == &file_type_source + if (pgm_state == STATE_INPUT + && case_source_is_class (vfm_source, &file_type_source_class) && !FILE_TYPE_okay (cp)) return CMD_FAILURE; diff --git a/src/data-list.c b/src/data-list.c index a0036d6a..d0287b6a 100644 --- a/src/data-list.c +++ b/src/data-list.c @@ -50,15 +50,20 @@ /* Describes how to parse one variable. */ struct dls_var_spec { - struct dls_var_spec *next; + struct dls_var_spec *next; /* Next specification in list. */ + + /* Both free and fixed formats. */ + struct fmt_spec input; /* Input format of this field. */ struct variable *v; /* Associated variable. Used only in parsing. Not safe later. */ - char name[9]; /* Free-format: Name of variable. */ - int rec; /* Fixed-format: Record number (1-based). */ - int fc, lc; /* Fixed-format: Column numbers in record. */ - struct fmt_spec input; /* Input format of this field. */ int fv; /* First value in case. */ - int width; /* 0=numeric, >0=width of alpha field. */ + + /* Fixed format only. */ + int rec; /* Record number (1-based). */ + int fc, lc; /* Column numbers in record. */ + + /* Free format only. */ + char name[9]; /* Name of variable. */ }; /* Constants for DATA LIST type. */ @@ -74,7 +79,7 @@ enum struct data_list_pgm { struct trns_header h; - struct dls_var_spec *spec; /* Variable parsing specifications. */ + struct dls_var_spec *first, *last; /* Variable parsing specifications. */ struct file_handle *handle; /* Input file, never NULL. */ /* Do not reorder preceding fields. */ @@ -84,19 +89,12 @@ struct data_list_pgm int nrec; /* Number of records. */ }; -/* Holds information on parsing the data file. */ -static struct data_list_pgm dls; - -/* Pointer to a pointer to where the first dls_var_spec should go. */ -static struct dls_var_spec **first; - -/* Last dls_var_spec in the chain. Used for building the linked-list. */ -static struct dls_var_spec *next; - -static int parse_fixed (void); -static int parse_free (void); -static void dump_fixed_table (void); -static void dump_free_table (void); +static int parse_fixed (struct data_list_pgm *); +static int parse_free (struct dls_var_spec **, struct dls_var_spec **); +static void dump_fixed_table (const struct dls_var_spec *specs, + const struct file_handle *handle, int nrec); +static void dump_free_table (const struct data_list_pgm *); +static void destroy_dls_var_spec (struct dls_var_spec *); static void destroy_dls (struct trns_header *); static int read_one_case (struct trns_header *, struct ccase *); @@ -106,38 +104,40 @@ static int read_one_case (struct trns_header *, struct ccase *); int cmd_data_list (void) { + /* DATA LIST program under construction. */ + struct data_list_pgm *dls; + /* 0=print no table, 1=print table. (TABLE subcommand.) */ int table = -1; lex_match_id ("DATA"); lex_match_id ("LIST"); - if (vfm_source != &input_program_source - && vfm_source != &file_type_source) + if (!case_source_is_complex (vfm_source)) discard_variables (); - dls.handle = default_handle; - dls.type = -1; - dls.end = NULL; - dls.eof = 0; - dls.nrec = 0; - dls.spec = NULL; - next = NULL; - first = &dls.spec; + dls = xmalloc (sizeof *dls); + dls->handle = default_handle; + dls->type = -1; + dls->end = NULL; + dls->eof = 0; + dls->nrec = 0; + dls->first = dls->last = NULL; while (token != '/') { if (lex_match_id ("FILE")) { lex_match ('='); - dls.handle = fh_parse_file_handle (); - if (!dls.handle) - return CMD_FAILURE; - if (vfm_source == &file_type_source && dls.handle != default_handle) + dls->handle = fh_parse_file_handle (); + if (!dls->handle) + goto error; + if (case_source_is_class (vfm_source, &file_type_source_class) + && dls->handle != default_handle) { msg (SE, _("DATA LIST may not use a different file from " "that specified on its surrounding FILE TYPE.")); - return CMD_FAILURE; + goto error; } } else if (lex_match_id ("RECORDS")) @@ -145,25 +145,25 @@ cmd_data_list (void) lex_match ('='); lex_match ('('); if (!lex_force_int ()) - return CMD_FAILURE; - dls.nrec = lex_integer (); + goto error; + dls->nrec = lex_integer (); lex_get (); lex_match (')'); } else if (lex_match_id ("END")) { - if (dls.end) + if (dls->end) { msg (SE, _("The END subcommand may only be specified once.")); - return CMD_FAILURE; + goto error; } lex_match ('='); if (!lex_force_id ()) - return CMD_FAILURE; - dls.end = dict_lookup_var (default_dict, tokid); - if (!dls.end) - dls.end = dict_create_var_assert (default_dict, tokid, 0); + goto error; + dls->end = dict_lookup_var (default_dict, tokid); + if (!dls->end) + dls->end = dict_create_var_assert (default_dict, tokid, 0); lex_get (); } else if (token == T_ID) @@ -180,7 +180,7 @@ cmd_data_list (void) if (*p == NULL) { lex_error (NULL); - return CMD_FAILURE; + goto error; } lex_get (); @@ -188,14 +188,14 @@ cmd_data_list (void) index = p - id; if (index < 3) { - if (dls.type != -1) + if (dls->type != -1) { msg (SE, _("Only one of FIXED, FREE, or LIST may " "be specified.")); - return CMD_FAILURE; + goto error; } - dls.type = index; + dls->type = index; } else table = index - 3; @@ -203,65 +203,71 @@ cmd_data_list (void) else { lex_error (NULL); - return CMD_FAILURE; + goto error; } } - default_handle = dls.handle; + default_handle = dls->handle; - if (dls.type == -1) - dls.type = DLS_FIXED; + if (dls->type == -1) + dls->type = DLS_FIXED; if (table == -1) { - if (dls.type == DLS_FREE) + if (dls->type == DLS_FREE) table = 0; else table = 1; } - if (dls.type == DLS_FIXED) + if (dls->type == DLS_FIXED) { - if (!parse_fixed ()) - return CMD_FAILURE; + if (!parse_fixed (dls)) + goto error; if (table) - dump_fixed_table (); + dump_fixed_table (dls->first, dls->handle, dls->nrec); } else { - if (!parse_free ()) - return CMD_FAILURE; + if (!parse_free (&dls->first, &dls->last)) + goto error; if (table) - dump_free_table (); + dump_free_table (dls); } if (vfm_source != NULL) { struct data_list_pgm *new_pgm; - dls.h.proc = read_one_case; - dls.h.free = destroy_dls; + dls->h.proc = read_one_case; + dls->h.free = destroy_dls; new_pgm = xmalloc (sizeof *new_pgm); memcpy (new_pgm, &dls, sizeof *new_pgm); - add_transformation ((struct trns_header *) new_pgm); + add_transformation (&new_pgm->h); } - else - vfm_source = &data_list_source; + else + vfm_source = create_case_source (&data_list_source_class, dls); return CMD_SUCCESS; + + error: + destroy_dls_var_spec (dls->first); + free (dls); + return CMD_FAILURE; } static void -append_var_spec (struct dls_var_spec *spec) +append_var_spec (struct dls_var_spec **first, struct dls_var_spec **last, + struct dls_var_spec *spec) { - if (next == 0) - *first = next = xmalloc (sizeof *spec); - else - next = next->next = xmalloc (sizeof *spec); + spec->next = NULL; - memcpy (next, spec, sizeof *spec); - next->next = NULL; + if (*first == NULL) + *first = spec; + else + (*last)->next = spec; + *last = spec; } /* Fixed-format parsing. */ @@ -278,30 +284,27 @@ struct fmt_list /* Used as "local" variables among the fixed-format parsing funcs. If it were guaranteed that PSPP were going to be compiled by gcc, I'd make all these functions a single set of nested functions. */ -static struct +struct fixed_parsing_state { char **name; /* Variable names. */ - int nname; /* Number of names. */ - int cname; /* dump_fmt_list: index of next name to use. */ + int name_cnt; /* Number of names. */ int recno; /* Index of current record. */ int sc; /* 1-based column number of starting column for next field to output. */ + }; - struct dls_var_spec spec; /* Next specification to output. */ - int fc, lc; /* First, last column in set of fields specified - together. */ - - int level; /* Nesting level in fixed_parse_fortran(). */ - } -fx; - -static int fixed_parse_compatible (void); -static struct fmt_list *fixed_parse_fortran (void); +static int fixed_parse_compatible (struct fixed_parsing_state *, + struct dls_var_spec **, + struct dls_var_spec **); +static int fixed_parse_fortran (struct fixed_parsing_state *, + struct dls_var_spec **, + struct dls_var_spec **); static int -parse_fixed (void) +parse_fixed (struct data_list_pgm *dls) { + struct fixed_parsing_state fx; int i; fx.recno = 0; @@ -329,21 +332,18 @@ parse_fixed (void) } fx.sc = 1; } - fx.spec.rec = fx.recno; - if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE)) + if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE)) return 0; if (token == T_NUM) { - if (!fixed_parse_compatible ()) + if (!fixed_parse_compatible (&fx, &dls->first, &dls->last)) goto fail; } else if (token == '(') { - fx.level = 0; - fx.cname = 0; - if (!fixed_parse_fortran ()) + if (!fixed_parse_fortran (&fx, &dls->first, &dls->last)) goto fail; } else @@ -353,18 +353,23 @@ parse_fixed (void) goto fail; } - for (i = 0; i < fx.nname; i++) + for (i = 0; i < fx.name_cnt; i++) free (fx.name[i]); free (fx.name); } - if (dls.nrec && next->rec > dls.nrec) + if (dls->first == NULL) + { + msg (SE, _("At least one variable must be specified.")); + return 0; + } + if (dls->nrec && dls->last->rec > dls->nrec) { msg (SE, _("Variables are specified on records that " "should not exist according to RECORDS subcommand.")); return 0; } - else if (!dls.nrec) - dls.nrec = next->rec; + else if (!dls->nrec) + dls->nrec = dls->last->rec; if (token != '.') { lex_error (_("expecting end of command")); @@ -373,41 +378,45 @@ parse_fixed (void) return 1; fail: - for (i = 0; i < fx.nname; i++) + for (i = 0; i < fx.name_cnt; i++) free (fx.name[i]); free (fx.name); return 0; } static int -fixed_parse_compatible (void) +fixed_parse_compatible (struct fixed_parsing_state *fx, + struct dls_var_spec **first, struct dls_var_spec **last) { - int dividend; + struct fmt_spec input; + int fc, lc; + int width; int i; + /* First column. */ if (!lex_force_int ()) return 0; - - fx.fc = lex_integer (); - if (fx.fc < 1) + fc = lex_integer (); + if (fc < 1) { msg (SE, _("Column positions for fields must be positive.")); return 0; } lex_get (); + /* Last column. */ lex_negative_to_dash (); if (lex_match ('-')) { if (!lex_force_int ()) return 0; - fx.lc = lex_integer (); - if (fx.lc < 1) + lc = lex_integer (); + if (lc < 1) { msg (SE, _("Column positions for fields must be positive.")); return 0; } - else if (fx.lc < fx.fc) + else if (lc < fc) { msg (SE, _("The ending column for a field must be " "greater than the starting column.")); @@ -417,9 +426,19 @@ fixed_parse_compatible (void) lex_get (); } else - fx.lc = fx.fc; + lc = fc; - fx.spec.input.w = fx.lc - fx.fc + 1; + /* Divide columns evenly. */ + input.w = (lc - fc + 1) / fx->name_cnt; + if ((lc - fc + 1) % fx->name_cnt) + { + msg (SE, _("The %d columns %d-%d " + "can't be evenly divided into %d fields."), + lc - fc + 1, fc, lc, fx->name_cnt); + return 0; + } + + /* Format specifier. */ if (lex_match ('(')) { struct fmt_desc *fdp; @@ -428,8 +447,8 @@ fixed_parse_compatible (void) { const char *cp; - fx.spec.input.type = parse_format_specifier_name (&cp, 0); - if (fx.spec.input.type == -1) + input.type = parse_format_specifier_name (&cp, 0); + if (input.type == -1) return 0; if (*cp) { @@ -442,7 +461,7 @@ fixed_parse_compatible (void) lex_match (','); } else - fx.spec.input.type = FMT_F; + input.type = FMT_F; if (lex_integer_p ()) { @@ -453,102 +472,88 @@ fixed_parse_compatible (void) return 0; } - fx.spec.input.d = lex_integer (); + input.d = lex_integer (); lex_get (); } else - fx.spec.input.d = 0; + input.d = 0; - fdp = &formats[fx.spec.input.type]; - if (fdp->n_args < 2 && fx.spec.input.d) + fdp = &formats[input.type]; + if (fdp->n_args < 2 && input.d) { msg (SE, _("Input format %s doesn't accept decimal places."), fdp->name); return 0; } - if (fx.spec.input.d > 16) - fx.spec.input.d = 16; + if (input.d > 16) + input.d = 16; if (!lex_force_match (')')) return 0; } else { - fx.spec.input.type = FMT_F; - fx.spec.input.d = 0; + input.type = FMT_F; + input.d = 0; } + if (!check_input_specifier (&input)) + return 0; - fx.sc = fx.lc + 1; - - if ((fx.lc - fx.fc + 1) % fx.nname) - { - msg (SE, _("The %d columns %d-%d " - "can't be evenly divided into %d fields."), - fx.lc - fx.fc + 1, fx.fc, fx.lc, fx.nname); - return 0; - } + /* Start column for next specification. */ + fx->sc = lc + 1; - dividend = (fx.lc - fx.fc + 1) / fx.nname; - fx.spec.input.w = dividend; - if (!check_input_specifier (&fx.spec.input)) - return 0; + /* Width of variables to create. */ + if (input.type == FMT_A || input.type == FMT_AHEX) + width = input.w; + else + width = 0; - for (i = 0; i < fx.nname; i++) + /* Create variables and var specs. */ + for (i = 0; i < fx->name_cnt; i++) { - int type; - int width; + struct dls_var_spec *spec; struct variable *v; - if (fx.spec.input.type == FMT_A || fx.spec.input.type == FMT_AHEX) - { - type = ALPHA; - width = dividend; - } - else - { - type = NUMERIC; - width = 0; - } - - v = dict_create_var (default_dict, fx.name[i], width); - if (v) + v = dict_create_var (default_dict, fx->name[i], width); + if (v != NULL) { - convert_fmt_ItoO (&fx.spec.input, &v->print); + convert_fmt_ItoO (&input, &v->print); v->write = v->print; - if (vfm_source != &input_program_source - && vfm_source != &file_type_source) + if (!case_source_is_complex (vfm_source)) v->init = 0; } else { - v = dict_lookup_var_assert (default_dict, fx.name[i]); - if (!vfm_source) + v = dict_lookup_var_assert (default_dict, fx->name[i]); + if (vfm_source == NULL) { - msg (SE, _("%s is a duplicate variable name."), fx.name[i]); + msg (SE, _("%s is a duplicate variable name."), fx->name[i]); return 0; } - if (type != v->type) + if ((width != 0) != (v->width != 0)) { msg (SE, _("There is already a variable %s of a " "different type."), - fx.name[i]); + fx->name[i]); return 0; } - if (type == ALPHA && dividend != v->width) + if (width != 0 && width != v->width) { msg (SE, _("There is already a string variable %s of a " - "different width."), fx.name[i]); + "different width."), fx->name[i]); return 0; } } - fx.spec.v = v; - fx.spec.fc = fx.fc + dividend * i; - fx.spec.lc = fx.spec.fc + dividend - 1; - fx.spec.fv = v->fv; - fx.spec.width = v->width; - append_var_spec (&fx.spec); + spec = xmalloc (sizeof *spec); + spec->input = input; + spec->v = v; + spec->fv = v->fv; + spec->rec = fx->recno; + spec->fc = fc + input.w * i; + spec->lc = spec->fc + input.w - 1; + append_var_spec (first, last, spec); } return 1; } @@ -570,149 +575,164 @@ destroy_fmt_list (struct fmt_list *f, int recurse) /* Takes a hierarchically structured fmt_list F as constructed by fixed_parse_fortran(), and flattens it into a linear list of - dls_var_spec's. */ + dls_var_spec's. NAME_IDX is used to take values from the list + of names in FX; it should initially point to a value of 0. */ static int -dump_fmt_list (struct fmt_list *f) +dump_fmt_list (struct fixed_parsing_state *fx, struct fmt_list *f, + struct dls_var_spec **first, struct dls_var_spec **last, + int *name_idx) { int i; for (; f; f = f->next) if (f->f.type == FMT_X) - fx.sc += f->count; + fx->sc += f->count; else if (f->f.type == FMT_T) - fx.sc = f->f.w; + fx->sc = f->f.w; else if (f->f.type == FMT_NEWREC) { - fx.recno += f->count; - fx.sc = 1; + fx->recno += f->count; + fx->sc = 1; } else for (i = 0; i < f->count; i++) if (f->f.type == FMT_DESCEND) { - if (!dump_fmt_list (f->down)) + if (!dump_fmt_list (fx, f->down, first, last, name_idx)) return 0; } else { - int type; + struct dls_var_spec *spec; int width; struct variable *v; if (formats[f->f.type].cat & FCAT_STRING) - { - type = ALPHA; - width = f->f.w; - } - else - { - type = NUMERIC; - width = 0; - } - if (fx.cname >= fx.nname) + width = f->f.w; + else + width = 0; + if (*name_idx >= fx->name_cnt) { msg (SE, _("The number of format " - "specifications exceeds the number of " - "variable names given.")); + "specifications exceeds the given number of " + "variable names.")); return 0; } - fx.spec.v = v = dict_create_var (default_dict, - fx.name[fx.cname++], - width); + v = dict_create_var (default_dict, fx->name[(*name_idx)++], width); if (!v) { - msg (SE, _("%s is a duplicate variable name."), fx.name[i]); + msg (SE, _("%s is a duplicate variable name."), fx->name[i]); return 0; } - if (vfm_source != &input_program_source - && vfm_source != &file_type_source) + if (!case_source_is_complex (vfm_source)) v->init = 0; - fx.spec.input = f->f; - convert_fmt_ItoO (&fx.spec.input, &v->print); - v->write = v->print; + spec = xmalloc (sizeof *spec); + spec->v = v; + spec->input = f->f; + spec->fv = v->fv; + spec->rec = fx->recno; + spec->fc = fx->sc; + spec->lc = fx->sc + f->f.w - 1; + append_var_spec (first, last, spec); - fx.spec.rec = fx.recno; - fx.spec.fc = fx.sc; - fx.spec.lc = fx.sc + f->f.w - 1; - fx.spec.fv = v->fv; - fx.spec.width = v->width; - append_var_spec (&fx.spec); + convert_fmt_ItoO (&spec->input, &v->print); + v->write = v->print; - fx.sc += f->f.w; + fx->sc += f->f.w; } return 1; } -/* Calls itself recursively to parse nested levels of parentheses. - Returns to its original caller: NULL, to indicate error; non-NULL, - but nothing useful, to indicate success (it returns a free()'d - block). */ +/* Recursively parses a FORTRAN-like format specification. LEVEL + is the level of recursion, starting from 0. Returns the + parsed specification if successful, or a null pointer on + failure. */ static struct fmt_list * -fixed_parse_fortran (void) +fixed_parse_fortran_internal (struct fixed_parsing_state *fx, + struct dls_var_spec **first, + struct dls_var_spec **last) { - struct fmt_list *head; - struct fmt_list *fl = NULL; + struct fmt_list *head = NULL; + struct fmt_list *tail = NULL; - lex_get (); /* Skip opening parenthesis. */ + lex_force_match ('('); while (token != ')') { - if (fl) - fl = fl->next = xmalloc (sizeof *fl); + /* New fmt_list. */ + struct fmt_list *new = xmalloc (sizeof *new); + new->next = NULL; + + /* Append new to list. */ + if (head != NULL) + tail->next = new; else - head = fl = xmalloc (sizeof *fl); + head = new; + tail = new; + /* Parse count. */ if (lex_integer_p ()) { - fl->count = lex_integer (); + new->count = lex_integer (); lex_get (); } else - fl->count = 1; + new->count = 1; + /* Parse format specifier. */ if (token == '(') { - fl->f.type = FMT_DESCEND; - fx.level++; - fl->down = fixed_parse_fortran (); - fx.level--; - if (!fl->down) + new->f.type = FMT_DESCEND; + new->down = fixed_parse_fortran_internal (fx, first, last); + if (new->down == NULL) goto fail; } else if (lex_match ('/')) - fl->f.type = FMT_NEWREC; - else if (!parse_format_specifier (&fl->f, 1) - || !check_input_specifier (&fl->f)) + new->f.type = FMT_NEWREC; + else if (!parse_format_specifier (&new->f, 1) + || !check_input_specifier (&new->f)) goto fail; lex_match (','); } - fl->next = NULL; - lex_get (); - - if (fx.level) - return head; + lex_force_match (')'); - fl->next = NULL; - dump_fmt_list (head); - if (fx.cname < fx.nname) - { - msg (SE, _("There aren't enough format specifications " - "to match the number of variable names given.")); - goto fail; - } - destroy_fmt_list (head, 1); return head; fail: - fl->next = NULL; destroy_fmt_list (head, 0); return NULL; } +/* Parses a FORTRAN-like format specification. Returns nonzero + if successful. */ +static int +fixed_parse_fortran (struct fixed_parsing_state *fx, + struct dls_var_spec **first, struct dls_var_spec **last) +{ + struct fmt_list *list; + int name_idx; + + list = fixed_parse_fortran_internal (fx, first, last); + if (list == NULL) + return 0; + + name_idx = 0; + dump_fmt_list (fx, list, first, last, &name_idx); + destroy_fmt_list (list, 1); + if (name_idx < fx->name_cnt) + { + msg (SE, _("There aren't enough format specifications " + "to match the number of variable names given.")); + return 0; + } + + return 1; +} + /* Displays a table giving information on fixed-format variable parsing on DATA LIST. */ /* FIXME: The `Columns' column should be divided into three columns, @@ -720,15 +740,16 @@ fail: column; then right-justify the starting column and left-justify the ending column. */ static void -dump_fixed_table (void) +dump_fixed_table (const struct dls_var_spec *specs, + const struct file_handle *handle, int nrec) { - struct dls_var_spec *spec; + const struct dls_var_spec *spec; struct tab_table *t; char *buf; const char *filename; int i; - for (i = 0, spec = *first; spec; spec = spec->next) + for (i = 0, spec = specs; spec; spec = spec->next) i++; t = tab_create (4, i + 1, 0); tab_columns (t, TAB_COL_DOWN, 1); @@ -741,7 +762,7 @@ dump_fixed_table (void) tab_hline (t, TAL_2, 0, 3, 1); tab_dim (t, tab_natural_dimensions); - for (i = 1, spec = *first; spec; spec = spec->next, i++) + for (i = 1, spec = specs; spec; spec = spec->next, i++) { tab_text (t, 0, i, TAB_LEFT, spec->v->name); tab_text (t, 1, i, TAT_PRINTF, "%d", spec->rec); @@ -751,27 +772,17 @@ dump_fixed_table (void) fmt_to_string (&spec->input)); } - if (*first == dls.spec) - { - filename = fh_handle_name (dls.handle); - if (filename == NULL) - filename = ""; - buf = local_alloc (strlen (filename) + INT_DIGITS + 80); - sprintf (buf, (dls.handle != inline_file - ? - ngettext("Reading %d record from file %s.", - "Reading %d records from file %s.",dls.nrec) - : - ngettext("Reading %d record from the command file.", - "Reading %d records from the command file.", - dls.nrec)), - dls.nrec, filename); - } - else - { - buf = local_alloc (strlen (_("Occurrence data specifications.")) + 1); - strcpy (buf, _("Occurrence data specifications.")); - } + filename = fh_handle_name (handle); + if (filename == NULL) + filename = ""; + buf = local_alloc (strlen (filename) + INT_DIGITS + 80); + sprintf (buf, (handle != inline_file + ? ngettext ("Reading %d record from file %s.", + "Reading %d records from file %s.", nrec) + : ngettext ("Reading %d record from the command file.", + "Reading %d records from the command file.", + nrec)), + nrec, filename); tab_title (t, 0, buf); tab_submit (t); @@ -782,66 +793,69 @@ dump_fixed_table (void) /* Free-format parsing. */ static int -parse_free (void) +parse_free (struct dls_var_spec **first, struct dls_var_spec **last) { - struct dls_var_spec spec; - struct fmt_spec in, out; - char **name; - int nname; - int i; - lex_get (); while (token != '.') { + struct fmt_spec input, output; + char **name; + int name_cnt; int width; + int i; - if (!parse_DATA_LIST_vars (&name, &nname, PV_NONE)) + if (!parse_DATA_LIST_vars (&name, &name_cnt, PV_NONE)) return 0; if (lex_match ('(')) { - if (!parse_format_specifier (&in, 0) || !check_input_specifier (&in)) - goto fail; - if (!lex_force_match (')')) - goto fail; - convert_fmt_ItoO (&in, &out); + if (!parse_format_specifier (&input, 0) + || !check_input_specifier (&input) + || !lex_force_match (')')) + { + for (i = 0; i < name_cnt; i++) + free (name[i]); + free (name); + return 0; + } + convert_fmt_ItoO (&input, &output); } else { lex_match ('*'); - in.type = FMT_F; - in.w = 8; - in.d = 0; - out = set_format; + input.type = FMT_F; + input.w = 8; + input.d = 0; + output = set_format; } - spec.input = in; - if (in.type == FMT_A || in.type == FMT_AHEX) - width = in.w; + if (input.type == FMT_A || input.type == FMT_AHEX) + width = input.w; else width = 0; - for (i = 0; i < nname; i++) + for (i = 0; i < name_cnt; i++) { + struct dls_var_spec *spec; struct variable *v; - spec.v = v = dict_create_var (default_dict, name[i], width); + v = dict_create_var (default_dict, name[i], width); if (!v) { msg (SE, _("%s is a duplicate variable name."), name[i]); return 0; } - - v->print = v->write = out; + v->print = v->write = output; - if (vfm_source != &input_program_source - && vfm_source != &file_type_source) + if (!case_source_is_complex (vfm_source)) v->init = 0; - strcpy (spec.name, name[i]); - spec.fv = v->fv; - spec.width = width; - append_var_spec (&spec); + spec = xmalloc (sizeof *spec); + spec->input = input; + spec->v = v; + spec->fv = v->fv; + strcpy (spec->name, name[i]); + append_var_spec (first, last, spec); } - for (i = 0; i < nname; i++) + for (i = 0; i < name_cnt; i++) free (name[i]); free (name); } @@ -849,25 +863,19 @@ parse_free (void) if (token != '.') lex_error (_("expecting end of command")); return 1; - -fail: - for (i = 0; i < nname; i++) - free (name[i]); - free (name); - return 0; } /* Displays a table giving information on free-format variable parsing on DATA LIST. */ static void -dump_free_table (void) +dump_free_table (const struct data_list_pgm *dls) { struct tab_table *t; int i; { struct dls_var_spec *spec; - for (i = 0, spec = dls.spec; spec; spec = spec->next) + for (i = 0, spec = dls->first; spec; spec = spec->next) i++; } @@ -883,7 +891,7 @@ dump_free_table (void) { struct dls_var_spec *spec; - for (i = 1, spec = dls.spec; spec; spec = spec->next, i++) + for (i = 1, spec = dls->first; spec; spec = spec->next, i++) { tab_text (t, 0, i, TAB_LEFT, spec->v->name); tab_text (t, 1, i, TAB_LEFT | TAT_FIX, fmt_to_string (&spec->input)); @@ -893,11 +901,11 @@ dump_free_table (void) { const char *filename; - filename = fh_handle_name (dls.handle); + filename = fh_handle_name (dls->handle); if (filename == NULL) filename = ""; tab_title (t, 1, - (dls.handle != inline_file + (dls->handle != inline_file ? _("Reading free-form data from file %s.") : _("Reading free-form data from the command file.")), filename); @@ -909,10 +917,6 @@ dump_free_table (void) /* Input procedure. */ -/* Pointer to relevant parsing data. Static just to avoid passing it - around so much. */ -static struct data_list_pgm *dlsp; - /* Extracts a field from the current position in the current record. Fields can be unquoted or quoted with single- or double-quote characters. *RET_LEN is set to the field length, *RET_CP is set to @@ -921,12 +925,12 @@ static struct data_list_pgm *dlsp; failure or a 1-based column number indicating the beginning of the field on success. */ static int -cut_field (char **ret_cp, int *ret_len) +cut_field (const struct data_list_pgm *dls, char **ret_cp, int *ret_len) { char *cp, *ep; int len; - cp = dfm_get_record (dlsp->handle, &len); + cp = dfm_get_record (dls->handle, &len); if (!cp) return 0; @@ -963,112 +967,65 @@ cut_field (char **ret_cp, int *ret_len) { int beginning_column; - dfm_set_record (dlsp->handle, *ret_cp); - beginning_column = dfm_get_cur_col (dlsp->handle) + 1; + dfm_set_record (dls->handle, *ret_cp); + beginning_column = dfm_get_cur_col (dls->handle) + 1; - dfm_set_record (dlsp->handle, cp); + dfm_set_record (dls->handle, cp); return beginning_column; } } -static int read_from_data_list_fixed (void); -static int read_from_data_list_free (void); -static int read_from_data_list_list (void); +typedef int data_list_read_func (const struct data_list_pgm *); +static data_list_read_func read_from_data_list_fixed; +static data_list_read_func read_from_data_list_free; +static data_list_read_func read_from_data_list_list; -/* FLAG==0: reads any number of cases into temp_case and calls - write_case() for each one, returns garbage. FLAG!=0: reads one - case into temp_case and returns -2 on eof, -1 otherwise. - Uses dlsp as the relevant parsing description. */ -static int -do_reading (int flag, write_case_func *write_case, write_case_data wc_data) +/* Returns the proper function to read the kind of DATA LIST + data specified by DLS. */ +static data_list_read_func * +get_data_list_read_func (const struct data_list_pgm *dls) { - int (*func) (void); - - int code; - - dfm_push (dlsp->handle); - - switch (dlsp->type) + switch (dls->type) { case DLS_FIXED: - func = read_from_data_list_fixed; + return read_from_data_list_fixed; break; + case DLS_FREE: - func = read_from_data_list_free; + return read_from_data_list_free; break; + case DLS_LIST: - func = read_from_data_list_list; + return read_from_data_list_list; break; + default: assert (0); } - if (flag) - { - code = func (); - if (code == -2) - { - if (dlsp->eof == 1) - { - msg (SE, _("Attempt to read past end of file.")); - err_failure (); - return -2; - } - dlsp->eof = 1; - } - else - dlsp->eof = 0; - - if (dlsp->end != NULL) - { - if (code == -2) - { - printf ("end of file, setting %s to 1\n", dlsp->end->name); - temp_case->data[dlsp->end->fv].f = 1.0; - code = -1; - } - else - { - printf ("not end of file, setting %s to 0\n", dlsp->end->name); - temp_case->data[dlsp->end->fv].f = 0.0; - } - } - } - else - { - while (func () != -2) - if (!write_case (wc_data)) - { - debug_printf ((_("abort in write_case()\n"))); - break; - } - fh_close_handle (dlsp->handle); - } - dfm_pop (dlsp->handle); - - return code; } /* Reads a case from the data file and parses it according to - fixed-format syntax rules. */ + fixed-format syntax rules. Returns -1 on success, -2 at end + of file. */ static int -read_from_data_list_fixed (void) +read_from_data_list_fixed (const struct data_list_pgm *dls) { - struct dls_var_spec *var_spec = dlsp->spec; + struct dls_var_spec *var_spec = dls->first; int i; - if (!dfm_get_record (dlsp->handle, NULL)) + if (!dfm_get_record (dls->handle, NULL)) return -2; - for (i = 1; i <= dlsp->nrec; i++) + for (i = 1; i <= dls->nrec; i++) { int len; - char *line = dfm_get_record (dlsp->handle, &len); + char *line = dfm_get_record (dls->handle, &len); if (!line) { /* Note that this can't occur on the first record. */ msg (SW, _("Partial case of %d of %d records discarded."), - i - 1, dlsp->nrec); + i - 1, dls->nrec); return -2; } @@ -1085,37 +1042,38 @@ read_from_data_list_fixed (void) data_in (&di); } - dfm_fwd_record (dlsp->handle); + dfm_fwd_record (dls->handle); } return -1; } /* Reads a case from the data file and parses it according to - free-format syntax rules. */ + free-format syntax rules. Returns -1 on success, -2 at end of + file. */ static int -read_from_data_list_free (void) +read_from_data_list_free (const struct data_list_pgm *dls) { struct dls_var_spec *var_spec; char *field; int len; - for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next) + for (var_spec = dls->first; var_spec; var_spec = var_spec->next) { int column; /* Cut out a field and read in a new record if necessary. */ for (;;) { - column = cut_field (&field, &len); + column = cut_field (dls, &field, &len); if (column != 0) break; - if (dfm_get_record (dlsp->handle, NULL)) - dfm_fwd_record (dlsp->handle); - if (!dfm_get_record (dlsp->handle, NULL)) + if (dfm_get_record (dls->handle, NULL)) + dfm_fwd_record (dls->handle); + if (!dfm_get_record (dls->handle, NULL)) { - if (var_spec != dlsp->spec) + if (var_spec != dls->first) msg (SW, _("Partial case discarded. The first variable " "missing was %s."), var_spec->name); return -2; @@ -1138,21 +1096,22 @@ read_from_data_list_free (void) } /* Reads a case from the data file and parses it according to - list-format syntax rules. */ + list-format syntax rules. Returns -1 on success, -2 at end of + file. */ static int -read_from_data_list_list (void) +read_from_data_list_list (const struct data_list_pgm *dls) { struct dls_var_spec *var_spec; char *field; int len; - if (!dfm_get_record (dlsp->handle, NULL)) + if (!dfm_get_record (dls->handle, NULL)) return -2; - for (var_spec = dlsp->spec; var_spec; var_spec = var_spec->next) + for (var_spec = dls->first; var_spec; var_spec = var_spec->next) { /* Cut out a field and check for end-of-line. */ - int column = cut_field (&field, &len); + int column = cut_field (dls, &field, &len); if (column == 0) { @@ -1161,11 +1120,14 @@ read_from_data_list_list (void) "These will be filled with the system-missing value " "or blanks, as appropriate."), var_spec->name); - for (; var_spec; var_spec = var_spec->next) - if (var_spec->width == 0) - temp_case->data[var_spec->fv].f = SYSMIS; - else - memset (temp_case->data[var_spec->fv].s, ' ', var_spec->width); + for (; var_spec; var_spec = var_spec->next) + { + int width = get_format_var_width (&var_spec->input); + if (width == 0) + temp_case->data[var_spec->fv].f = SYSMIS; + else + memset (temp_case->data[var_spec->fv].s, ' ', width); + } break; } @@ -1182,7 +1144,7 @@ read_from_data_list_list (void) } } - dfm_fwd_record (dlsp->handle); + dfm_fwd_record (dls->handle); return -1; } @@ -1205,8 +1167,9 @@ static void destroy_dls (struct trns_header *pgm) { struct data_list_pgm *dls = (struct data_list_pgm *) pgm; - destroy_dls_var_spec (dls->spec); + destroy_dls_var_spec (dls->first); fh_close_handle (dls->handle); + free (pgm); } /* Note that since this is exclusively an input program, C is @@ -1214,35 +1177,81 @@ destroy_dls (struct trns_header *pgm) static int read_one_case (struct trns_header *t, struct ccase *c UNUSED) { - dlsp = (struct data_list_pgm *) t; - return do_reading (1, NULL, NULL); + struct data_list_pgm *dls = (struct data_list_pgm *) t; + data_list_read_func *read_func; + int retval; + + dfm_push (dls->handle); + + read_func = get_data_list_read_func (dls); + retval = read_func (dls); + + /* Handle end of file. */ + if (retval == -2) + { + /* If we already encountered end of file then this is an + error. */ + if (dls->eof == 1) + { + msg (SE, _("Attempt to read past end of file.")); + err_failure (); + dfm_pop (dls->handle); + return -2; + } + + /* Otherwise simply note it. */ + dls->eof = 1; + } + else + dls->eof = 0; + + /* If there was an END subcommand handle it. */ + if (dls->end != NULL) + { + if (retval == -2) + { + temp_case->data[dls->end->fv].f = 1.0; + retval = -1; + } + else + temp_case->data[dls->end->fv].f = 0.0; + } + + dfm_pop (dls->handle); + + return retval; } /* Reads all the records from the data file and passes them to write_case(). */ static void -data_list_source_read (write_case_func *write_case, write_case_data wc_data) +data_list_source_read (struct case_source *source, + write_case_func *write_case, write_case_data wc_data) { - dlsp = &dls; - do_reading (0, write_case, wc_data); + struct data_list_pgm *dls = source->aux; + data_list_read_func *read_func = get_data_list_read_func (dls); + + dfm_push (dls->handle); + while (read_func (dls) != -2) + if (!write_case (wc_data)) + break; + dfm_pop (dls->handle); + + fh_close_handle (dls->handle); } /* Destroys the source's internal data. */ static void -data_list_source_destroy_source (void) +data_list_source_destroy (struct case_source *source) { - destroy_dls (&dls.h); + destroy_dls (source->aux); } -struct case_stream data_list_source = +const struct case_source_class data_list_source_class = { - NULL, - data_list_source_read, - NULL, - NULL, - data_list_source_destroy_source, - NULL, "DATA LIST", + data_list_source_read, + data_list_source_destroy, }; /* REPEATING DATA. */ @@ -1258,7 +1267,7 @@ struct rpd_num_or_var struct repeating_data_trns { struct trns_header h; - struct dls_var_spec *spec; /* Variable parsing specifications. */ + struct dls_var_spec *first, *last; /* Variable parsing specifications. */ struct file_handle *handle; /* Input file, never NULL. */ struct rpd_num_or_var starts_beg; /* STARTS=, before the dash. */ @@ -1278,13 +1287,11 @@ struct repeating_data_trns write_case_data wc_data; }; -/* Information about the transformation being parsed. */ -static struct repeating_data_trns rpd; - int repeating_data_trns_proc (struct trns_header *, struct ccase *); void repeating_data_trns_free (struct trns_header *); static int parse_num_or_var (struct rpd_num_or_var *, const char *); -static int parse_repeating_data (void); +static int parse_repeating_data (struct dls_var_spec **, + struct dls_var_spec **); static void find_variable_input_spec (struct variable *v, struct fmt_spec *spec); @@ -1292,6 +1299,8 @@ static void find_variable_input_spec (struct variable *v, int cmd_repeating_data (void) { + struct repeating_data_trns *rpd; + /* 0=print no table, 1=print table. (TABLE subcommand.) */ int table = 1; @@ -1301,20 +1310,18 @@ cmd_repeating_data (void) lex_match_id ("REPEATING"); lex_match_id ("DATA"); - assert (vfm_source == &input_program_source - || vfm_source == &file_type_source); - - rpd.handle = default_handle; - rpd.starts_beg.num = 0; - rpd.starts_beg.var = NULL; - rpd.starts_end = rpd.occurs = rpd.length = rpd.cont_beg - = rpd.cont_end = rpd.starts_beg; - rpd.id_beg = rpd.id_end = 0; - rpd.id_var = NULL; - rpd.id_value = NULL; - rpd.spec = NULL; - first = &rpd.spec; - next = NULL; + assert (case_source_is_complex (vfm_source)); + + rpd = xmalloc (sizeof *rpd); + rpd->handle = default_handle; + rpd->first = rpd->last = NULL; + rpd->starts_beg.num = 0; + rpd->starts_beg.var = NULL; + rpd->starts_end = rpd->occurs = rpd->length = rpd->cont_beg + = rpd->cont_end = rpd->starts_beg; + rpd->id_beg = rpd->id_end = 0; + rpd->id_var = NULL; + rpd->id_value = NULL; lex_match ('/'); @@ -1323,14 +1330,14 @@ cmd_repeating_data (void) if (lex_match_id ("FILE")) { lex_match ('='); - rpd.handle = fh_parse_file_handle (); - if (!rpd.handle) - return CMD_FAILURE; - if (rpd.handle != default_handle) + rpd->handle = fh_parse_file_handle (); + if (!rpd->handle) + goto error; + if (rpd->handle != default_handle) { msg (SE, _("REPEATING DATA must use the same file as its " "corresponding DATA LIST or FILE TYPE.")); - return CMD_FAILURE; + goto error; } } else if (lex_match_id ("STARTS")) @@ -1339,33 +1346,33 @@ cmd_repeating_data (void) if (seen & 1) { msg (SE, _("%s subcommand given multiple times."),"STARTS"); - return CMD_FAILURE; + goto error; } seen |= 1; - if (!parse_num_or_var (&rpd.starts_beg, "STARTS beginning column")) - return CMD_FAILURE; + if (!parse_num_or_var (&rpd->starts_beg, "STARTS beginning column")) + goto error; lex_negative_to_dash (); if (lex_match ('-')) { - if (!parse_num_or_var (&rpd.starts_end, "STARTS ending column")) - return CMD_FAILURE; + if (!parse_num_or_var (&rpd->starts_end, "STARTS ending column")) + goto error; } else { - /* Otherwise, rpd.starts_end is left uninitialized. + /* Otherwise, rpd->starts_end is left uninitialized. This is okay. We will initialize it later from the record length of the file. We can't do this now because we can't be sure that the user has specified the file handle yet. */ } - if (rpd.starts_beg.num != 0 && rpd.starts_end.num != 0 - && rpd.starts_beg.num > rpd.starts_end.num) + if (rpd->starts_beg.num != 0 && rpd->starts_end.num != 0 + && rpd->starts_beg.num > rpd->starts_end.num) { msg (SE, _("STARTS beginning column (%d) exceeds " "STARTS ending column (%d)."), - rpd.starts_beg.num, rpd.starts_end.num); - return CMD_FAILURE; + rpd->starts_beg.num, rpd->starts_end.num); + goto error; } } else if (lex_match_id ("OCCURS")) @@ -1374,12 +1381,12 @@ cmd_repeating_data (void) if (seen & 2) { msg (SE, _("%s subcommand given multiple times."),"OCCURS"); - return CMD_FAILURE; + goto error; } seen |= 2; - if (!parse_num_or_var (&rpd.occurs, "OCCURS")) - return CMD_FAILURE; + if (!parse_num_or_var (&rpd->occurs, "OCCURS")) + goto error; } else if (lex_match_id ("LENGTH")) { @@ -1387,12 +1394,12 @@ cmd_repeating_data (void) if (seen & 4) { msg (SE, _("%s subcommand given multiple times."),"LENGTH"); - return CMD_FAILURE; + goto error; } seen |= 4; - if (!parse_num_or_var (&rpd.length, "LENGTH")) - return CMD_FAILURE; + if (!parse_num_or_var (&rpd->length, "LENGTH")) + goto error; } else if (lex_match_id ("CONTINUED")) { @@ -1400,32 +1407,32 @@ cmd_repeating_data (void) if (seen & 8) { msg (SE, _("%s subcommand given multiple times."),"CONTINUED"); - return CMD_FAILURE; + goto error; } seen |= 8; if (!lex_match ('/')) { - if (!parse_num_or_var (&rpd.cont_beg, "CONTINUED beginning column")) - return CMD_FAILURE; + if (!parse_num_or_var (&rpd->cont_beg, "CONTINUED beginning column")) + goto error; lex_negative_to_dash (); if (lex_match ('-') - && !parse_num_or_var (&rpd.cont_end, + && !parse_num_or_var (&rpd->cont_end, "CONTINUED ending column")) - return CMD_FAILURE; + goto error; - if (rpd.cont_beg.num != 0 && rpd.cont_end.num != 0 - && rpd.cont_beg.num > rpd.cont_end.num) + if (rpd->cont_beg.num != 0 && rpd->cont_end.num != 0 + && rpd->cont_beg.num > rpd->cont_end.num) { msg (SE, _("CONTINUED beginning column (%d) exceeds " "CONTINUED ending column (%d)."), - rpd.cont_beg.num, rpd.cont_end.num); - return CMD_FAILURE; + rpd->cont_beg.num, rpd->cont_end.num); + goto error; } } else - rpd.cont_beg.num = 1; + rpd->cont_beg.num = 1; } else if (lex_match_id ("ID")) { @@ -1433,19 +1440,19 @@ cmd_repeating_data (void) if (seen & 16) { msg (SE, _("%s subcommand given multiple times."),"ID"); - return CMD_FAILURE; + goto error; } seen |= 16; if (!lex_force_int ()) - return CMD_FAILURE; + goto error; if (lex_integer () < 1) { msg (SE, _("ID beginning column (%ld) must be positive."), lex_integer ()); - return CMD_FAILURE; + goto error; } - rpd.id_beg = lex_integer (); + rpd->id_beg = lex_integer (); lex_get (); lex_negative_to_dash (); @@ -1453,34 +1460,34 @@ cmd_repeating_data (void) if (lex_match ('-')) { if (!lex_force_int ()) - return CMD_FAILURE; + goto error; if (lex_integer () < 1) { msg (SE, _("ID ending column (%ld) must be positive."), lex_integer ()); - return CMD_FAILURE; + goto error; } - if (lex_integer () < rpd.id_end) + if (lex_integer () < rpd->id_end) { msg (SE, _("ID ending column (%ld) cannot be less than " "ID beginning column (%d)."), - lex_integer (), rpd.id_beg); - return CMD_FAILURE; + lex_integer (), rpd->id_beg); + goto error; } - rpd.id_end = lex_integer (); + rpd->id_end = lex_integer (); lex_get (); } - else rpd.id_end = rpd.id_beg; + else rpd->id_end = rpd->id_beg; if (!lex_force_match ('=')) - return CMD_FAILURE; - rpd.id_var = parse_variable (); - if (rpd.id_var == NULL) - return CMD_FAILURE; + goto error; + rpd->id_var = parse_variable (); + if (rpd->id_var == NULL) + goto error; - find_variable_input_spec (rpd.id_var, &rpd.id_spec); - rpd.id_value = xmalloc (sizeof *rpd.id_value * rpd.id_var->nv); + find_variable_input_spec (rpd->id_var, &rpd->id_spec); + rpd->id_value = xmalloc (sizeof *rpd->id_value * rpd->id_var->nv); } else if (lex_match_id ("TABLE")) table = 1; @@ -1491,11 +1498,11 @@ cmd_repeating_data (void) else { lex_error (NULL); - return CMD_FAILURE; + goto error; } if (!lex_force_match ('/')) - return CMD_FAILURE; + goto error; } /* Comes here when DATA specification encountered. */ @@ -1505,47 +1512,47 @@ cmd_repeating_data (void) msg (SE, _("Missing required specification STARTS.")); if ((seen & 2) == 0) msg (SE, _("Missing required specification OCCURS.")); - return CMD_FAILURE; + goto error; } /* Enforce ID restriction. */ if ((seen & 16) && !(seen & 8)) { msg (SE, _("ID specified without CONTINUED.")); - return CMD_FAILURE; + goto error; } /* Calculate starts_end, cont_end if necessary. */ - if (rpd.starts_end.num == 0 && rpd.starts_end.var == NULL) - rpd.starts_end.num = fh_record_width (rpd.handle); - if (rpd.cont_end.num == 0 && rpd.starts_end.var == NULL) - rpd.cont_end.num = fh_record_width (rpd.handle); + if (rpd->starts_end.num == 0 && rpd->starts_end.var == NULL) + rpd->starts_end.num = fh_record_width (rpd->handle); + if (rpd->cont_end.num == 0 && rpd->starts_end.var == NULL) + rpd->cont_end.num = fh_record_width (rpd->handle); /* Calculate length if possible. */ if ((seen & 4) == 0) { struct dls_var_spec *iter; - for (iter = rpd.spec; iter; iter = iter->next) + for (iter = rpd->first; iter; iter = iter->next) { - if (iter->lc > rpd.length.num) - rpd.length.num = iter->lc; + if (iter->lc > rpd->length.num) + rpd->length.num = iter->lc; } - assert (rpd.length.num != 0); + assert (rpd->length.num != 0); } lex_match ('='); - if (!parse_repeating_data ()) - return CMD_FAILURE; + if (!parse_repeating_data (&rpd->first, &rpd->last)) + goto error; if (table) - dump_fixed_table (); + dump_fixed_table (rpd->first, rpd->handle, rpd->last->rec); { struct repeating_data_trns *new_trns; - rpd.h.proc = repeating_data_trns_proc; - rpd.h.free = repeating_data_trns_free; + rpd->h.proc = repeating_data_trns_proc; + rpd->h.free = repeating_data_trns_free; new_trns = xmalloc (sizeof *new_trns); memcpy (new_trns, &rpd, sizeof *new_trns); @@ -1553,6 +1560,11 @@ cmd_repeating_data (void) } return lex_end_of_command (); + + error: + destroy_dls_var_spec (rpd->first); + free (rpd->id_value); + return CMD_FAILURE; } /* Because of the way that DATA LIST is structured, it's not trivial @@ -1572,7 +1584,7 @@ find_variable_input_spec (struct variable *v, struct fmt_spec *spec) { struct dls_var_spec *iter; - for (iter = pgm->spec; iter; iter = iter->next) + for (iter = pgm->first; iter; iter = iter->next) if (iter->v == v) { *spec = iter->input; @@ -1624,8 +1636,9 @@ parse_num_or_var (struct rpd_num_or_var *value, const char *message) /* Parses data specifications for repeating data groups. Taken from parse_fixed(). Returns nonzero only if successful. */ static int -parse_repeating_data (void) +parse_repeating_data (struct dls_var_spec **first, struct dls_var_spec **last) { + struct fixed_parsing_state fx; int i; fx.recno = 0; @@ -1633,21 +1646,17 @@ parse_repeating_data (void) while (token != '.') { - fx.spec.rec = fx.recno; - - if (!parse_DATA_LIST_vars (&fx.name, &fx.nname, PV_NONE)) + if (!parse_DATA_LIST_vars (&fx.name, &fx.name_cnt, PV_NONE)) return 0; if (token == T_NUM) { - if (!fixed_parse_compatible ()) + if (!fixed_parse_compatible (&fx, first, last)) goto fail; } else if (token == '(') { - fx.level = 0; - fx.cname = 0; - if (!fixed_parse_fortran ()) + if (!fixed_parse_fortran (&fx, first, last)) goto fail; } else @@ -1657,7 +1666,7 @@ parse_repeating_data (void) goto fail; } - for (i = 0; i < fx.nname; i++) + for (i = 0; i < fx.name_cnt; i++) free (fx.name[i]); free (fx.name); } @@ -1669,8 +1678,8 @@ parse_repeating_data (void) return 1; -fail: - for (i = 0; i < fx.nname; i++) + fail: + for (i = 0; i < fx.name_cnt; i++) free (fx.name[i]); free (fx.name); return 0; @@ -1700,23 +1709,28 @@ realize_value (struct rpd_num_or_var *n, struct ccase *c) return 0; } -/* Parses one record of repeated data and outputs corresponding cases. - Repeating data is present in line LINE having length LEN. - Repeating data begins in column BEG and continues through column - END inclusive (1-based columns); occurrences are offset OFS columns - from each other. C is the case that will be filled in; T is the - REPEATING DATA transformation. The record ID will be verified if - COMPARE_ID is nonzero; if it is zero, then the record ID is - initialized to the ID present in the case (assuming that ID - location was specified by the user). Returns number of occurrences - parsed up to the specified maximum of MAX_OCCURS. */ +/* Parameter record passed to rpd_parse_record(). */ +struct rpd_parse_info + { + struct repeating_data_trns *trns; /* REPEATING DATA transformation. */ + const char *line; /* Line being parsed. */ + size_t len; /* Line length. */ + int beg, end; /* First and last column of first occurrence. */ + int ofs; /* Column offset between repeated occurrences. */ + struct ccase *c; /* Case to fill in. */ + int verify_id; /* Zero to initialize ID, nonzero to verify it. */ + int max_occurs; /* Max number of occurrences to parse. */ + }; + +/* Parses one record of repeated data and outputs corresponding + cases. Returns number of occurrences parsed up to the + maximum specified in INFO. */ static int -rpd_parse_record (int beg, int end, int ofs, struct ccase *c, - struct repeating_data_trns *t, - char *line, int len, int compare_id, int max_occurs) +rpd_parse_record (const struct rpd_parse_info *info) { + struct repeating_data_trns *t = info->trns; + int cur = info->beg; int occurrences; - int cur = beg; /* Handle record ID values. */ if (t->id_beg != 0) @@ -1727,8 +1741,8 @@ rpd_parse_record (int beg, int end, int ofs, struct ccase *c, { struct data_in di; - data_in_finite_line (&di, line, len, t->id_beg, t->id_end); - di.v = compare_id ? id_temp : t->id_value; + data_in_finite_line (&di, info->line, info->len, t->id_beg, t->id_end); + di.v = info->verify_id ? id_temp : t->id_value; di.flags = 0; di.f1 = t->id_beg; di.format = t->id_spec; @@ -1737,7 +1751,7 @@ rpd_parse_record (int beg, int end, int ofs, struct ccase *c, return 0; } - if (compare_id + if (info->verify_id && compare_values (id_temp, t->id_value, t->id_var->width) != 0) { char expected_str [MAX_FORMATTED_LEN + 1]; @@ -1763,35 +1777,35 @@ rpd_parse_record (int beg, int end, int ofs, struct ccase *c, { int warned = 0; - for (occurrences = 0; occurrences < max_occurs; ) + for (occurrences = 0; occurrences < info->max_occurs; ) { - if (cur + ofs > end + 1) + if (cur + info->ofs > info->end + 1) break; occurrences++; { - struct dls_var_spec *var_spec = t->spec; + struct dls_var_spec *var_spec = t->first; for (; var_spec; var_spec = var_spec->next) { int fc = var_spec->fc - 1 + cur; int lc = var_spec->lc - 1 + cur; - if (fc > len && !warned && var_spec->input.type != FMT_A) + if (fc > info->len && !warned && var_spec->input.type != FMT_A) { warned = 1; tmsg (SW, RPD_ERR, _("Variable %s starting in column %d extends " "beyond physical record length of %d."), - var_spec->v->name, fc, len); + var_spec->v->name, fc, info->len); } { struct data_in di; - data_in_finite_line (&di, line, len, fc, lc); - di.v = &c->data[var_spec->fv]; + data_in_finite_line (&di, info->line, info->len, fc, lc); + di.v = &info->c->data[var_spec->fv]; di.flags = 0; di.f1 = fc + 1; di.format = var_spec->input; @@ -1802,7 +1816,7 @@ rpd_parse_record (int beg, int end, int ofs, struct ccase *c, } } - cur += ofs; + cur += info->ofs; if (!t->write_case (t->wc_data)) return 0; @@ -1818,132 +1832,147 @@ rpd_parse_record (int beg, int end, int ofs, struct ccase *c, int repeating_data_trns_proc (struct trns_header *trns, struct ccase *c) { - dfm_push (dlsp->handle); - - { - struct repeating_data_trns *t = (struct repeating_data_trns *) trns; + struct repeating_data_trns *t = (struct repeating_data_trns *) trns; - char *line; /* Current record. */ - int len; /* Length of current record. */ + char *line; /* Current record. */ + int len; /* Length of current record. */ - int starts_beg; /* Starting column. */ - int starts_end; /* Ending column. */ - int occurs; /* Number of repetitions. */ - int length; /* Length of each occurrence. */ - int cont_beg; /* Starting column for continuation lines. */ - int cont_end; /* Ending column for continuation lines. */ + int starts_beg; /* Starting column. */ + int starts_end; /* Ending column. */ + int occurs; /* Number of repetitions. */ + int length; /* Length of each occurrence. */ + int cont_beg; /* Starting column for continuation lines. */ + int cont_end; /* Ending column for continuation lines. */ - int occurs_left; /* Number of occurrences remaining. */ + int occurs_left; /* Number of occurrences remaining. */ - int code; /* Return value from rpd_parse_record(). */ + int code; /* Return value from rpd_parse_record(). */ - int skip_first_record = 0; + int skip_first_record = 0; - /* Read the current record. */ - dfm_bkwd_record (dlsp->handle, 1); - line = dfm_get_record (dlsp->handle, &len); - if (line == NULL) - return -2; - dfm_fwd_record (dlsp->handle); - - /* Calculate occurs, length. */ - occurs_left = occurs = realize_value (&t->occurs, c); - if (occurs <= 0) - { - tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs); - return -3; - } - starts_beg = realize_value (&t->starts_beg, c); - if (starts_beg <= 0) - { - tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be " - "at least 1."), - starts_beg); - return -3; - } - starts_end = realize_value (&t->starts_end, c); - if (starts_end < starts_beg) - { - tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than " - "beginning column (%d)."), - starts_end, starts_beg); - skip_first_record = 1; - } - length = realize_value (&t->length, c); - if (length < 0) - { - tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length); - length = 1; - occurs = occurs_left = 1; - } - cont_beg = realize_value (&t->cont_beg, c); - if (cont_beg < 0) - { - tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be " - "at least 1."), - cont_beg); - return -2; - } - cont_end = realize_value (&t->cont_end, c); - if (cont_end < cont_beg) - { - tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than " - "beginning column (%d)."), - cont_end, cont_beg); - return -2; - } + dfm_push (t->handle); + + /* Read the current record. */ + dfm_bkwd_record (t->handle, 1); + line = dfm_get_record (t->handle, &len); + if (line == NULL) + return -2; + dfm_fwd_record (t->handle); - /* Parse the first record. */ - if (!skip_first_record) - { - code = rpd_parse_record (starts_beg, starts_end, length, c, t, line, - len, 0, occurs_left); - if (!code) - return -2; - } - else if (cont_beg == 0) + /* Calculate occurs, length. */ + occurs_left = occurs = realize_value (&t->occurs, c); + if (occurs <= 0) + { + tmsg (SE, RPD_ERR, _("Invalid value %d for OCCURS."), occurs); + return -3; + } + starts_beg = realize_value (&t->starts_beg, c); + if (starts_beg <= 0) + { + tmsg (SE, RPD_ERR, _("Beginning column for STARTS (%d) must be " + "at least 1."), + starts_beg); return -3; + } + starts_end = realize_value (&t->starts_end, c); + if (starts_end < starts_beg) + { + tmsg (SE, RPD_ERR, _("Ending column for STARTS (%d) is less than " + "beginning column (%d)."), + starts_end, starts_beg); + skip_first_record = 1; + } + length = realize_value (&t->length, c); + if (length < 0) + { + tmsg (SE, RPD_ERR, _("Invalid value %d for LENGTH."), length); + length = 1; + occurs = occurs_left = 1; + } + cont_beg = realize_value (&t->cont_beg, c); + if (cont_beg < 0) + { + tmsg (SE, RPD_ERR, _("Beginning column for CONTINUED (%d) must be " + "at least 1."), + cont_beg); + return -2; + } + cont_end = realize_value (&t->cont_end, c); + if (cont_end < cont_beg) + { + tmsg (SE, RPD_ERR, _("Ending column for CONTINUED (%d) is less than " + "beginning column (%d)."), + cont_end, cont_beg); + return -2; + } - /* Make sure, if some occurrences are left, that we have - continuation records. */ - occurs_left -= code; - if (occurs_left != 0 && cont_beg == 0) - { - tmsg (SE, RPD_ERR, - _("Number of repetitions specified on OCCURS (%d) " - "exceed number of repetitions available in " - "space on STARTS (%d), and CONTINUED not specified."), - occurs, code); - return -2; - } + /* Parse the first record. */ + if (!skip_first_record) + { + struct rpd_parse_info info; + info.trns = t; + info.line = line; + info.len = len; + info.beg = starts_beg; + info.end = starts_end; + info.c = c; + info.verify_id = 0; + info.max_occurs = occurs_left; + code = rpd_parse_record (&info);; + if (!code) + return -2; + } + else if (cont_beg == 0) + return -3; - /* Go on to additional records. */ - while (occurs_left != 0) - { - assert (occurs_left >= 0); + /* Make sure, if some occurrences are left, that we have + continuation records. */ + occurs_left -= code; + if (occurs_left != 0 && cont_beg == 0) + { + tmsg (SE, RPD_ERR, + _("Number of repetitions specified on OCCURS (%d) " + "exceed number of repetitions available in " + "space on STARTS (%d), and CONTINUED not specified."), + occurs, code); + return -2; + } - /* Read in another record. */ - line = dfm_get_record (dlsp->handle, &len); - if (line == NULL) - { - tmsg (SE, RPD_ERR, - _("Unexpected end of file with %d repetitions " - "remaining out of %d."), - occurs_left, occurs); - return -2; - } - dfm_fwd_record (dlsp->handle); + /* Go on to additional records. */ + while (occurs_left != 0) + { + struct rpd_parse_info info; - /* Parse this record. */ - code = rpd_parse_record (cont_beg, cont_end, length, c, t, line, - len, 1, occurs_left); - if (!code) - return -2; - occurs_left -= code; - } - } + assert (occurs_left >= 0); + + /* Read in another record. */ + line = dfm_get_record (t->handle, &len); + if (line == NULL) + { + tmsg (SE, RPD_ERR, + _("Unexpected end of file with %d repetitions " + "remaining out of %d."), + occurs_left, occurs); + return -2; + } + dfm_fwd_record (t->handle); + + /* Parse this record. */ + info.trns = t; + info.line = line; + info.len = len; + info.beg = cont_beg; + info.end = cont_end; + info.c = c; + info.verify_id = 1; + info.max_occurs = occurs_left; + code = rpd_parse_record (&info);; + if (!code) + return -2; + occurs_left -= code; + } - dfm_pop (dlsp->handle); + dfm_pop (t->handle); /* FIXME: This is a kluge until we've implemented multiplexing of transformations. */ @@ -1955,7 +1984,7 @@ repeating_data_trns_free (struct trns_header *rpd_) { struct repeating_data_trns *rpd = (struct repeating_data_trns *) rpd_; - destroy_dls_var_spec (rpd->spec); + destroy_dls_var_spec (rpd->first); fh_close_handle (rpd->handle); free (rpd->id_value); } diff --git a/src/dfm.c b/src/dfm.c index 9905b6b3..1ccbb71e 100644 --- a/src/dfm.c +++ b/src/dfm.c @@ -650,9 +650,9 @@ cmd_begin_data (void) /* FIXME: figure out the *exact* conditions, not these really lenient conditions. */ if (vfm_source == NULL - || vfm_source == &vfm_memory_stream - || vfm_source == &vfm_disk_stream - || vfm_source == &sort_stream) + || case_source_is_class (vfm_source, &memory_source_class) + || case_source_is_class (vfm_source, &disk_source_class) + || case_source_is_class (vfm_source, &sort_source_class)) { msg (SE, _("This command is not valid here since the current " "input program does not access the inline file.")); diff --git a/src/file-handle.h b/src/file-handle.h index b086d787..c7b45505 100644 --- a/src/file-handle.h +++ b/src/file-handle.h @@ -92,7 +92,7 @@ struct file_handle *fh_parse_file_handle (void); void fh_close_handle (struct file_handle *handle); /* Handle info. */ -const char *fh_handle_name (struct file_handle *handle); +const char *fh_handle_name (const struct file_handle *handle); char *fh_handle_filename (struct file_handle *handle); size_t fh_record_width (struct file_handle *handle); diff --git a/src/file-handle.q b/src/file-handle.q index c4878bbb..77291a45 100644 --- a/src/file-handle.q +++ b/src/file-handle.q @@ -241,7 +241,7 @@ fh_get_handle_by_name (const char name[9]) Useful for printing error messages about use of file handles. */ const char * -fh_handle_name (struct file_handle *h) +fh_handle_name (const struct file_handle *h) { static char *buf = NULL; diff --git a/src/file-type.c b/src/file-type.c index 1e647ecb..fee98518 100644 --- a/src/file-type.c +++ b/src/file-type.c @@ -88,9 +88,6 @@ struct file_type_pgm struct record_type *recs_tail; /* Last in list of record types. */ }; -/* Current FILE TYPE input program. */ -static struct file_type_pgm fty; - static int parse_col_spec (struct col_spec *, const char *); static void create_col_var (struct col_spec *c); @@ -98,32 +95,36 @@ static void create_col_var (struct col_spec *c); int cmd_file_type (void) { + static struct file_type_pgm *fty; + /* Initialize. */ discard_variables (); - fty.handle = inline_file; - fty.record.name[0] = 0; - fty.case_sbc.name[0] = 0; - fty.wild = fty.duplicate = fty.missing = fty.ordered = 0; - fty.had_rec_type = 0; - fty.recs_head = fty.recs_tail = NULL; + + fty = xmalloc (sizeof *fty); + fty->handle = inline_file; + fty->record.name[0] = 0; + fty->case_sbc.name[0] = 0; + fty->wild = fty->duplicate = fty->missing = fty->ordered = 0; + fty->had_rec_type = 0; + fty->recs_head = fty->recs_tail = NULL; lex_match_id ("TYPE"); if (lex_match_id ("MIXED")) - fty.type = FTY_MIXED; + fty->type = FTY_MIXED; else if (lex_match_id ("GROUPED")) { - fty.type = FTY_GROUPED; - fty.wild = 1; - fty.duplicate = 1; - fty.missing = 1; - fty.ordered = 1; + fty->type = FTY_GROUPED; + fty->wild = 1; + fty->duplicate = 1; + fty->missing = 1; + fty->ordered = 1; } else if (lex_match_id ("NESTED")) - fty.type = FTY_NESTED; + fty->type = FTY_NESTED; else { msg (SE, _("MIXED, GROUPED, or NESTED expected.")); - return CMD_FAILURE; + goto error; } while (token != '.') @@ -131,151 +132,155 @@ cmd_file_type (void) if (lex_match_id ("FILE")) { lex_match ('='); - fty.handle = fh_parse_file_handle (); - if (!fty.handle) - return CMD_FAILURE; + fty->handle = fh_parse_file_handle (); + if (!fty->handle) + goto error; } else if (lex_match_id ("RECORD")) { lex_match ('='); - if (!parse_col_spec (&fty.record, "####RECD")) - return CMD_FAILURE; + if (!parse_col_spec (&fty->record, "####RECD")) + goto error; } else if (lex_match_id ("CASE")) { - if (fty.type == FTY_MIXED) + if (fty->type == FTY_MIXED) { msg (SE, _("The CASE subcommand is not valid on FILE TYPE " "MIXED.")); - return CMD_FAILURE; + goto error; } lex_match ('='); - if (!parse_col_spec (&fty.case_sbc, "####CASE")) - return CMD_FAILURE; + if (!parse_col_spec (&fty->case_sbc, "####CASE")) + goto error; } else if (lex_match_id ("WILD")) { lex_match ('='); if (lex_match_id ("WARN")) - fty.wild = 1; + fty->wild = 1; else if (lex_match_id ("NOWARN")) - fty.wild = 0; + fty->wild = 0; else { msg (SE, _("WARN or NOWARN expected after WILD.")); - return CMD_FAILURE; + goto error; } } else if (lex_match_id ("DUPLICATE")) { - if (fty.type == FTY_MIXED) + if (fty->type == FTY_MIXED) { msg (SE, _("The DUPLICATE subcommand is not valid on " "FILE TYPE MIXED.")); - return CMD_FAILURE; + goto error; } lex_match ('='); if (lex_match_id ("WARN")) - fty.duplicate = 1; + fty->duplicate = 1; else if (lex_match_id ("NOWARN")) - fty.duplicate = 0; + fty->duplicate = 0; else if (lex_match_id ("CASE")) { - if (fty.type != FTY_NESTED) + if (fty->type != FTY_NESTED) { msg (SE, _("DUPLICATE=CASE is only valid on " "FILE TYPE NESTED.")); - return CMD_FAILURE; + goto error; } - fty.duplicate = 2; + fty->duplicate = 2; } else { msg (SE, _("WARN%s expected after DUPLICATE."), - (fty.type == FTY_NESTED ? _(", NOWARN, or CASE") + (fty->type == FTY_NESTED ? _(", NOWARN, or CASE") : _(" or NOWARN"))); - return CMD_FAILURE; + goto error; } } else if (lex_match_id ("MISSING")) { - if (fty.type == FTY_MIXED) + if (fty->type == FTY_MIXED) { msg (SE, _("The MISSING subcommand is not valid on " "FILE TYPE MIXED.")); - return CMD_FAILURE; + goto error; } lex_match ('='); if (lex_match_id ("NOWARN")) - fty.missing = 0; + fty->missing = 0; else if (lex_match_id ("WARN")) - fty.missing = 1; + fty->missing = 1; else { msg (SE, _("WARN or NOWARN after MISSING.")); - return CMD_FAILURE; + goto error; } } else if (lex_match_id ("ORDERED")) { - if (fty.type != FTY_GROUPED) + if (fty->type != FTY_GROUPED) { msg (SE, _("ORDERED is only valid on FILE TYPE GROUPED.")); - return CMD_FAILURE; + goto error; } lex_match ('='); if (lex_match_id ("YES")) - fty.ordered = 1; + fty->ordered = 1; else if (lex_match_id ("NO")) - fty.ordered = 0; + fty->ordered = 0; else { msg (SE, _("YES or NO expected after ORDERED.")); - return CMD_FAILURE; + goto error; } } else { lex_error (_("while expecting a valid subcommand")); - return CMD_FAILURE; + goto error; } } - if (fty.record.name[0] == 0) + if (fty->record.name[0] == 0) { msg (SE, _("The required RECORD subcommand was not present.")); - return CMD_FAILURE; + goto error; } - if (fty.type == FTY_GROUPED) + if (fty->type == FTY_GROUPED) { - if (fty.case_sbc.name[0] == 0) + if (fty->case_sbc.name[0] == 0) { msg (SE, _("The required CASE subcommand was not present.")); - return CMD_FAILURE; + goto error; } - if (!strcmp (fty.case_sbc.name, fty.record.name)) + if (!strcmp (fty->case_sbc.name, fty->record.name)) { msg (SE, _("CASE and RECORD must specify different variable " "names.")); - return CMD_FAILURE; + goto error; } } - default_handle = fty.handle; + default_handle = fty->handle; - vfm_source = &file_type_source; - create_col_var (&fty.record); - if (fty.case_sbc.name[0]) - create_col_var (&fty.case_sbc); + vfm_source = create_case_source (&file_type_source_class, fty); + create_col_var (&fty->record); + if (fty->case_sbc.name[0]) + create_col_var (&fty->case_sbc); return CMD_SUCCESS; + + error: + free (fty); + return CMD_FAILURE; } /* Creates a variable with attributes specified by struct col_spec C, and @@ -298,6 +303,7 @@ parse_col_spec (struct col_spec *c, const char *def_name) { struct fmt_spec spec; + /* Name. */ if (token == T_ID) { strcpy (c->name, tokid); @@ -306,6 +312,7 @@ parse_col_spec (struct col_spec *c, const char *def_name) else strcpy (c->name, def_name); + /* First column. */ if (!lex_force_int ()) return 0; c->fc = lex_integer (); @@ -316,6 +323,7 @@ parse_col_spec (struct col_spec *c, const char *def_name) } lex_get (); + /* Last column. */ lex_negative_to_dash (); if (lex_match ('-')) { @@ -335,6 +343,7 @@ parse_col_spec (struct col_spec *c, const char *def_name) else c->nc = 1; + /* Format specifier. */ if (lex_match ('(')) { const char *cp; @@ -363,69 +372,53 @@ parse_col_spec (struct col_spec *c, const char *def_name) /* RECORD TYPE. */ -/* Structure being filled in by internal_cmd_record_type. */ -static struct record_type rct; - -static int internal_cmd_record_type (void); - /* Parse the RECORD TYPE command. */ int cmd_record_type (void) { - int result = internal_cmd_record_type (); - - if (result == CMD_FAILURE) - { - int i; - - if (formats[fty.record.fmt].cat & FCAT_STRING) - for (i = 0; i < rct.nv; i++) - free (rct.v[i].c); - free (rct.v); - } - - return result; -} - -static int -internal_cmd_record_type (void) -{ - /* Initialize the record_type structure. */ - rct.next = NULL; - rct.flags = 0; - if (fty.duplicate) - rct.flags |= RCT_DUPLICATE; - if (fty.missing) - rct.flags |= RCT_MISSING; - rct.v = NULL; - rct.nv = 0; - rct.ft = n_trns; - if (fty.case_sbc.name[0]) - rct.case_sbc = fty.case_sbc; + struct file_type_pgm *fty; + struct record_type *rct; /* Make sure we're inside a FILE TYPE structure. */ - if (pgm_state != STATE_INPUT || vfm_source != &file_type_source) + if (pgm_state != STATE_INPUT + || !case_source_is_class (vfm_source, &file_type_source_class)) { msg (SE, _("This command may only appear within a " "FILE TYPE/END FILE TYPE structure.")); return CMD_FAILURE; } - if (fty.recs_tail && (fty.recs_tail->flags & RCT_OTHER)) + fty = vfm_source->aux; + + /* Initialize the record_type structure. */ + rct = xmalloc (sizeof *rct); + rct->next = NULL; + rct->flags = 0; + if (fty->duplicate) + rct->flags |= RCT_DUPLICATE; + if (fty->missing) + rct->flags |= RCT_MISSING; + rct->v = NULL; + rct->nv = 0; + rct->ft = n_trns; + if (fty->case_sbc.name[0]) + rct->case_sbc = fty->case_sbc; + + if (fty->recs_tail && (fty->recs_tail->flags & RCT_OTHER)) { msg (SE, _("OTHER may appear only on the last RECORD TYPE command.")); - return CMD_FAILURE; + goto error; } - if (fty.recs_tail) + if (fty->recs_tail) { - fty.recs_tail->lt = n_trns - 1; - if (!(fty.recs_tail->flags & RCT_SKIP) - && fty.recs_tail->ft == fty.recs_tail->lt) + fty->recs_tail->lt = n_trns - 1; + if (!(fty->recs_tail->flags & RCT_SKIP) + && fty->recs_tail->ft == fty->recs_tail->lt) { msg (SE, _("No input commands (DATA LIST, REPEATING DATA) " "for above RECORD TYPE.")); - return CMD_FAILURE; + goto error; } } @@ -434,34 +427,34 @@ internal_cmd_record_type (void) /* Parse record type values. */ if (lex_match_id ("OTHER")) - rct.flags |= RCT_OTHER; + rct->flags |= RCT_OTHER; else { int mv = 0; while (token == T_NUM || token == T_STRING) { - if (rct.nv >= mv) + if (rct->nv >= mv) { mv += 16; - rct.v = xrealloc (rct.v, mv * sizeof *rct.v); + rct->v = xrealloc (rct->v, mv * sizeof *rct->v); } - if (formats[fty.record.fmt].cat & FCAT_STRING) + if (formats[fty->record.fmt].cat & FCAT_STRING) { if (!lex_force_string ()) - return CMD_FAILURE; - rct.v[rct.nv].c = xmalloc (fty.record.nc + 1); - st_bare_pad_copy (rct.v[rct.nv].c, ds_value (&tokstr), - fty.record.nc + 1); + goto error; + rct->v[rct->nv].c = xmalloc (fty->record.nc + 1); + st_bare_pad_copy (rct->v[rct->nv].c, ds_value (&tokstr), + fty->record.nc + 1); } else { if (!lex_force_num ()) - return CMD_FAILURE; - rct.v[rct.nv].f = tokval; + goto error; + rct->v[rct->nv].f = tokval; } - rct.nv++; + rct->nv++; lex_get (); lex_match (','); @@ -472,90 +465,103 @@ internal_cmd_record_type (void) while (token != '.') { if (lex_match_id ("SKIP")) - rct.flags |= RCT_SKIP; + rct->flags |= RCT_SKIP; else if (lex_match_id ("CASE")) { - if (fty.type == FTY_MIXED) + if (fty->type == FTY_MIXED) { msg (SE, _("The CASE subcommand is not allowed on " "the RECORD TYPE command for FILE TYPE MIXED.")); - return CMD_FAILURE; + goto error; } lex_match ('='); - if (!parse_col_spec (&rct.case_sbc, "")) - return CMD_FAILURE; - if (rct.case_sbc.name[0]) + if (!parse_col_spec (&rct->case_sbc, "")) + goto error; + if (rct->case_sbc.name[0]) { msg (SE, _("No variable name may be specified for the " "CASE subcommand on RECORD TYPE.")); - return CMD_FAILURE; + goto error; } - if ((formats[rct.case_sbc.fmt].cat ^ formats[fty.case_sbc.fmt].cat) + if ((formats[rct->case_sbc.fmt].cat ^ formats[fty->case_sbc.fmt].cat) & FCAT_STRING) { msg (SE, _("The CASE column specification on RECORD TYPE " "must give a format specifier that is the " "same type as that of the CASE column " "specification given on FILE TYPE.")); - return CMD_FAILURE; + goto error; } } else if (lex_match_id ("DUPLICATE")) { lex_match ('='); if (lex_match_id ("WARN")) - rct.flags |= RCT_DUPLICATE; + rct->flags |= RCT_DUPLICATE; else if (lex_match_id ("NOWARN")) - rct.flags &= ~RCT_DUPLICATE; + rct->flags &= ~RCT_DUPLICATE; else { msg (SE, _("WARN or NOWARN expected on DUPLICATE " "subcommand.")); - return CMD_FAILURE; + goto error; } } else if (lex_match_id ("MISSING")) { lex_match ('='); if (lex_match_id ("WARN")) - rct.flags |= RCT_MISSING; + rct->flags |= RCT_MISSING; else if (lex_match_id ("NOWARN")) - rct.flags &= ~RCT_MISSING; + rct->flags &= ~RCT_MISSING; else { msg (SE, _("WARN or NOWARN expected on MISSING subcommand.")); - return CMD_FAILURE; + goto error; } } else if (lex_match_id ("SPREAD")) { lex_match ('='); if (lex_match_id ("YES")) - rct.flags |= RCT_SPREAD; + rct->flags |= RCT_SPREAD; else if (lex_match_id ("NO")) - rct.flags &= ~RCT_SPREAD; + rct->flags &= ~RCT_SPREAD; else { msg (SE, _("YES or NO expected on SPREAD subcommand.")); - return CMD_FAILURE; + goto error; } } else { lex_error (_("while expecting a valid subcommand")); - return CMD_FAILURE; + goto error; } } - if (fty.recs_head) - fty.recs_tail = fty.recs_tail->next = xmalloc (sizeof *fty.recs_tail); + if (fty->recs_head) + fty->recs_tail = fty->recs_tail->next = xmalloc (sizeof *fty->recs_tail); else - fty.recs_head = fty.recs_tail = xmalloc (sizeof *fty.recs_tail); - memcpy (fty.recs_tail, &rct, sizeof *fty.recs_tail); + fty->recs_head = fty->recs_tail = xmalloc (sizeof *fty->recs_tail); + memcpy (fty->recs_tail, &rct, sizeof *fty->recs_tail); return CMD_SUCCESS; + + error: + if (formats[fty->record.fmt].cat & FCAT_STRING) + { + int i; + + for (i = 0; i < rct->nv; i++) + free (rct->v[i].c); + } + free (rct->v); + free (rct); + + return CMD_FAILURE; } /* END FILE TYPE. */ @@ -563,20 +569,24 @@ internal_cmd_record_type (void) int cmd_end_file_type (void) { - if (pgm_state != STATE_INPUT || vfm_source != &file_type_source) + struct file_type_pgm *fty; + + if (pgm_state != STATE_INPUT + || case_source_is_class (vfm_source, &file_type_source_class)) { msg (SE, _("This command may only appear within a " "FILE TYPE/END FILE TYPE structure.")); return CMD_FAILURE; } + fty = vfm_source->aux; lex_match_id ("TYPE"); - if (fty.recs_tail) + if (fty->recs_tail) { - fty.recs_tail->lt = n_trns - 1; - if (!(fty.recs_tail->flags & RCT_SKIP) - && fty.recs_tail->ft == fty.recs_tail->lt) + fty->recs_tail->lt = n_trns - 1; + if (!(fty->recs_tail->flags & RCT_SKIP) + && fty->recs_tail->ft == fty->recs_tail->lt) { msg (SE, _("No input commands (DATA LIST, REPEATING DATA) " "on above RECORD TYPE.")); @@ -609,64 +619,66 @@ cmd_end_file_type (void) /* Reads any number of cases into temp_case and calls write_case() for each one. Compare data-list.c:read_from_data_list. */ static void -file_type_source_read (write_case_func *write_case UNUSED, +file_type_source_read (struct case_source *source, + write_case_func *write_case UNUSED, write_case_data wc_data UNUSED) { + struct file_type_pgm *fty = source->aux; char *line; int len; struct fmt_spec format; - dfm_push (fty.handle); + dfm_push (fty->handle); - format.type = fty.record.fmt; - format.w = fty.record.nc; + format.type = fty->record.fmt; + format.w = fty->record.nc; format.d = 0; - while (NULL != (line = dfm_get_record (fty.handle, &len))) + while (NULL != (line = dfm_get_record (fty->handle, &len))) { struct record_type *iter; union value v; int i; - if (formats[fty.record.fmt].cat & FCAT_STRING) + if (formats[fty->record.fmt].cat & FCAT_STRING) { struct data_in di; - v.c = temp_case->data[fty.record.v->fv].s; + v.c = temp_case->data[fty->record.v->fv].s; data_in_finite_line (&di, line, len, - fty.record.fc, fty.record.fc + fty.record.nc); + fty->record.fc, fty->record.fc + fty->record.nc); di.v = (union value *) v.c; di.flags = 0; - di.f1 = fty.record.fc; + di.f1 = fty->record.fc; di.format = format; data_in (&di); - for (iter = fty.recs_head; iter; iter = iter->next) + for (iter = fty->recs_head; iter; iter = iter->next) { if (iter->flags & RCT_OTHER) goto found; for (i = 0; i < iter->nv; i++) - if (!memcmp (iter->v[i].c, v.c, fty.record.nc)) + if (!memcmp (iter->v[i].c, v.c, fty->record.nc)) goto found; } - if (fty.wild) - msg (SW, _("Unknown record type \"%.*s\"."), fty.record.nc, v.c); + if (fty->wild) + msg (SW, _("Unknown record type \"%.*s\"."), fty->record.nc, v.c); } else { struct data_in di; data_in_finite_line (&di, line, len, - fty.record.fc, fty.record.fc + fty.record.nc); + fty->record.fc, fty->record.fc + fty->record.nc); di.v = &v; di.flags = 0; - di.f1 = fty.record.fc; + di.f1 = fty->record.fc; di.format = format; data_in (&di); - memcpy (&temp_case->data[fty.record.v->fv].f, &v.f, sizeof v.f); - for (iter = fty.recs_head; iter; iter = iter->next) + memcpy (&temp_case->data[fty->record.v->fv].f, &v.f, sizeof v.f); + for (iter = fty->recs_head; iter; iter = iter->next) { if (iter->flags & RCT_OTHER) goto found; @@ -674,19 +686,19 @@ file_type_source_read (write_case_func *write_case UNUSED, if (iter->v[i].f == v.f) goto found; } - if (fty.wild) + if (fty->wild) msg (SW, _("Unknown record type %g."), v.f); } - dfm_fwd_record (fty.handle); + dfm_fwd_record (fty->handle); continue; found: /* Arrive here if there is a matching record_type, which is in iter. */ - dfm_fwd_record (fty.handle); + dfm_fwd_record (fty->handle); } -/* switch(fty.type) +/* switch(fty->type) { case FTY_MIXED: read_from_file_type_mixed(); break; case FTY_GROUPED: read_from_file_type_grouped(); break; @@ -694,29 +706,26 @@ file_type_source_read (write_case_func *write_case UNUSED, default: assert(0); } */ - dfm_pop (fty.handle); + dfm_pop (fty->handle); } static void -file_type_source_destroy_source (void) +file_type_source_destroy (struct case_source *source) { + struct file_type_pgm *fty = source->aux; struct record_type *iter, *next; cancel_transformations (); - for (iter = fty.recs_head; iter; iter = next) + for (iter = fty->recs_head; iter; iter = next) { next = iter->next; free (iter); } } -struct case_stream file_type_source = +const struct case_source_class file_type_source_class = { - NULL, - file_type_source_read, - NULL, - NULL, - file_type_source_destroy_source, - NULL, "FILE TYPE", + file_type_source_read, + file_type_source_destroy, }; diff --git a/src/flip.c b/src/flip.c index f599fd78..4ff7045e 100644 --- a/src/flip.c +++ b/src/flip.c @@ -17,6 +17,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/* FIXME: does this work with long string variables? */ + #include #include #include @@ -29,17 +31,11 @@ #include "error.h" #include "lexer.h" #include "misc.h" +#include "settings.h" #include "str.h" #include "var.h" #include "vfm.h" -/* Variables to transpose. */ -static struct variable **var; -static int nvar; - -/* Variable containing new variable names. */ -static struct variable *newnames; - /* List of variable names. */ struct varname { @@ -47,71 +43,104 @@ struct varname char name[9]; }; -/* New variable names. */ -static struct varname *new_names_head, *new_names_tail; -static int case_count; +/* Represents a FLIP input program. */ +struct flip_pgm + { + struct variable **var; /* Variables to transpose. */ + int var_cnt; /* Number of variables. */ + struct variable *newnames; /* Variable containing new variable names. */ + struct varname *new_names_head, *new_names_tail; + /* New variable names. */ + int case_count; /* Number of cases. */ -static int build_dictionary (void); + }; + +static void destroy_flip_pgm (struct flip_pgm *flip); +static struct case_sink *flip_sink_create (struct flip_pgm *); +static const struct case_source_class flip_source_class; +static int build_dictionary (struct flip_pgm *flip); /* Parses and executes FLIP. */ int cmd_flip (void) { + struct flip_pgm *flip; + + flip = xmalloc (sizeof *flip); + flip->var = NULL; + flip->var_cnt = 0; + flip->newnames = NULL; + flip->new_names_head = flip->new_names_tail = NULL; + flip->case_count = 0; + lex_match_id ("FLIP"); lex_match ('/'); if (lex_match_id ("VARIABLES")) { lex_match ('='); - if (!parse_variables (default_dict, &var, &nvar, PV_NO_DUPLICATE)) + if (!parse_variables (default_dict, &flip->var, &flip->var_cnt, PV_NO_DUPLICATE)) return CMD_FAILURE; lex_match ('/'); } else - dict_get_vars (default_dict, &var, &nvar, 1u << DC_SYSTEM); + dict_get_vars (default_dict, &flip->var, &flip->var_cnt, 1u << DC_SYSTEM); lex_match ('/'); if (lex_match_id ("NEWNAMES")) { lex_match ('='); - newnames = parse_variable (); - if (!newnames) - { - free (var); - return CMD_FAILURE; - } + flip->newnames = parse_variable (); + if (!flip->newnames) + goto error; } else - newnames = dict_lookup_var (default_dict, "CASE_LBL"); + flip->newnames = dict_lookup_var (default_dict, "CASE_LBL"); - if (newnames) + if (flip->newnames) { int i; - for (i = 0; i < nvar; i++) - if (var[i] == newnames) + for (i = 0; i < flip->var_cnt; i++) + if (flip->var[i] == flip->newnames) { - memmove (&var[i], &var[i + 1], sizeof *var * (nvar - i - 1)); - nvar--; + memmove (&flip->var[i], &flip->var[i + 1], sizeof *flip->var * (flip->var_cnt - i - 1)); + flip->var_cnt--; break; } } - case_count = 0; + flip->case_count = 0; temp_trns = temporary = 0; - vfm_sink = &flip_stream; - new_names_tail = NULL; + vfm_sink = flip_sink_create (flip); + flip->new_names_tail = NULL; procedure (NULL, NULL, NULL, NULL); dict_clear (default_dict); - if (!build_dictionary ()) + if (!build_dictionary (flip)) { discard_variables (); - free (var); - return CMD_FAILURE; + goto error; } - free (var); return lex_end_of_command (); + + error: + destroy_flip_pgm (flip); + return CMD_FAILURE; +} + +static void +destroy_flip_pgm (struct flip_pgm *flip) +{ + struct varname *iter, *next; + + free (flip->var); + for (iter = flip->new_names_head; iter != NULL; iter = next) + { + next = iter->next; + free (iter); + } + free (flip); } /* Make a new variable with base name NAME, which is bowdlerized and @@ -165,21 +194,21 @@ make_new_var (char name[]) /* Make a new dictionary for all the new variable names. */ static int -build_dictionary (void) +build_dictionary (struct flip_pgm *flip) { dict_create_var_assert (default_dict, "CASE_LBL", 8); - if (!new_names_tail) + if (flip->new_names_head == NULL) { int i; - if (case_count > 99999) + if (flip->case_count > 99999) { msg (SE, _("Cannot create more than 99999 variable names.")); return 0; } - for (i = 0; i < case_count; i++) + for (i = 0; i < flip->case_count; i++) { struct variable *v; char s[9]; @@ -190,127 +219,91 @@ build_dictionary (void) } else { - struct varname *v, *n; + struct varname *v; - new_names_tail->next = NULL; - for (v = new_names_head; v; v = n) - { - n = v->next; - if (!make_new_var (v->name)) - { - for (; v; v = n) - { - n = v->next; - free (v); - } - return 0; - } - free (v); - } + for (v = flip->new_names_head; v; v = v->next) + if (!make_new_var (v->name)) + return 0; } return 1; } +/* Cases during transposition. */ +struct flip_sink_info + { + struct flip_pgm *flip; /* FLIP program. */ + int internal; /* Internal or external flip. */ + char *old_names; /* Old variable names. */ + unsigned long case_cnt; /* Number of cases. */ + FILE *file; /* Temporary file. */ + }; -/* Each case to be transposed. */ -struct flip_case +/* Source: Cases after transposition. */ +struct flip_source_info { - struct flip_case *next; - double v[1]; + struct flip_pgm *flip; /* FLIP program. */ + char *old_names; /* Old variable names. */ + unsigned long case_cnt; /* Number of cases. */ + FILE *file; /* Temporary file. */ }; -/* Sink: Cases during transposition. */ -static int internal; /* Internal vs. external flipping. */ -static char *sink_old_names; /* Old variable names. */ -static unsigned long sink_cases; /* Number of cases. */ -static struct flip_case *head, *tail; /* internal == 1: Cases. */ -static FILE *sink_file; /* internal == 0: Temporary file. */ +static const struct case_sink_class flip_sink_class; -/* Source: Cases after transposition. */ -static struct flip_case *src; /* Internal transposition records. */ -static char *src_old_names; /* Old variable names. */ -static unsigned long src_cases; /* Number of cases. */ -static FILE *src_file; /* src == NULL: Temporary file. */ - -/* Initialize the FLIP stream. */ -static void -flip_stream_init (void) +static FILE *flip_file (struct flip_sink_info *info); + +/* Creates a flip sink based on FLIP, of which it takes + ownership. */ +static struct case_sink * +flip_sink_create (struct flip_pgm *flip) { - internal = 1; - sink_cases = 0; - tail = NULL; + struct flip_sink_info *info = xmalloc (sizeof *info); + + info->flip = flip; + info->case_cnt = 0; { - size_t n = nvar; + size_t n = flip->var_cnt; char *p; int i; - for (i = 0; i < nvar; i++) - n += strlen (var[i]->name); - p = sink_old_names = xmalloc (n); - for (i = 0; i < nvar; i++) - p = stpcpy (p, var[i]->name) + 1; + for (i = 0; i < flip->var_cnt; i++) + n += strlen (flip->var[i]->name); + p = info->old_names = xmalloc (n); + for (i = 0; i < flip->var_cnt; i++) + p = stpcpy (p, flip->var[i]->name) + 1; } + + return create_case_sink (&flip_sink_class, info); } -/* Reads the FLIP stream and passes it to write_case(). */ +/* Open the FLIP sink. */ static void -flip_stream_read (write_case_func *write_case, write_case_data wc_data) +flip_sink_open (struct case_sink *sink) { - if (src || (src == NULL && src_file == NULL)) - { - /* Internal transposition, or empty file. */ - int i, j; - char *p = src_old_names; - - for (i = 0; i < nvar; i++) - { - struct flip_case *iter; - - st_bare_pad_copy (temp_case->data[0].s, p, 8); - p = strchr (p, 0) + 1; - - for (iter = src, j = 1; iter; iter = iter->next, j++) - temp_case->data[j].f = iter->v[i]; + struct flip_sink_info *info = sink->aux; - if (!write_case (wc_data)) - return; - } - } - else - { - int i; - char *p = src_old_names; - - for (i = 0; i < nvar; i++) - { - st_bare_pad_copy (temp_case->data[0].s, p, 8); - p = strchr (p, 0) + 1; - - if (fread (&temp_case->data[1], sizeof (double), src_cases, - src_file) != src_cases) - msg (FE, _("Error reading FLIP source file: %s."), - strerror (errno)); - - if (!write_case (wc_data)) - return; - } - } + info->file = tmpfile (); + if (!info->file) + msg (FE, _("Could not create temporary file for FLIP.")); } -/* Writes temp_case to the FLIP stream. */ +/* Writes case C to the FLIP sink. */ static void -flip_stream_write (void) +flip_sink_write (struct case_sink *sink, struct ccase *c) { - sink_cases++; + struct flip_sink_info *info = sink->aux; + struct flip_pgm *flip = info->flip; + + info->case_cnt++; - if (newnames) + if (flip->newnames) { struct varname *v = xmalloc (sizeof (struct varname)); - if (newnames->type == NUMERIC) + v->next = NULL; + if (flip->newnames->type == NUMERIC) { - double f = temp_case->data[newnames->fv].f; + double f = c->data[flip->newnames->fv].f; if (f == SYSMIS) strcpy (v->name, "VSYSMIS"); @@ -328,236 +321,205 @@ flip_stream_write (void) } else { - int width = min (newnames->width, 8); - memcpy (v->name, temp_case->data[newnames->fv].s, width); + int width = min (flip->newnames->width, 8); + memcpy (v->name, c->data[flip->newnames->fv].s, width); v->name[width] = 0; } - if (new_names_tail == NULL) - new_names_head = v; + if (flip->new_names_head == NULL) + flip->new_names_head = v; else - new_names_tail->next = v; - new_names_tail = v; + flip->new_names_tail->next = v; + flip->new_names_tail = v; } else - case_count++; + flip->case_count++; - if (internal) - { -#if 0 - flip_case *c = malloc (sizeof (flip_case) - + sizeof (double) * (nvar - 1)); - - if (c != NULL) - { - /* Write to internal file. */ - int i; - - for (i = 0; i < nvar; i++) - if (var[i]->type == NUMERIC) - c->v[i] = temp_case->data[var[i]->fv].f; - else - c->v[i] = SYSMIS; - - if (tail == NULL) - head = c; - else - tail->next = c; - tail = c; - - return; - } - else -#endif - { - /* Initialize external file. */ - struct flip_case *iter, *next; - - internal = 0; - - sink_file = tmpfile (); - if (!sink_file) - msg (FE, _("Could not create temporary file for FLIP.")); - - if (tail) - tail->next = NULL; - for (iter = head; iter; iter = next) - { - next = iter->next; - - if (fwrite (iter->v, sizeof (double), nvar, sink_file) - != (size_t) nvar) - msg (FE, _("Error writing FLIP file: %s."), - strerror (errno)); - free (iter); - } - } - } /* Write to external file. */ { - double *d = local_alloc (sizeof *d * nvar); + double *d = local_alloc (sizeof *d * flip->var_cnt); int i; - for (i = 0; i < nvar; i++) - if (var[i]->type == NUMERIC) - d[i] = temp_case->data[var[i]->fv].f; + for (i = 0; i < flip->var_cnt; i++) + if (flip->var[i]->type == NUMERIC) + d[i] = c->data[flip->var[i]->fv].f; else d[i] = SYSMIS; - if (fwrite (d, sizeof *d, nvar, sink_file) != (size_t) nvar) + if (fwrite (d, sizeof *d, flip->var_cnt, info->file) != (size_t) flip->var_cnt) msg (FE, _("Error writing FLIP file: %s."), strerror (errno)); local_free (d); } } - -/* Transpose the external file. */ + +/* Destroy sink's internal data. */ static void -transpose_external_file (void) +flip_sink_destroy (struct case_sink *sink) { - unsigned long n_cases; - unsigned long cur_case; - double *case_buf, *temp_buf; + struct flip_sink_info *info = sink->aux; + + free (info->old_names); + destroy_flip_pgm (info->flip); + free (info); +} + +/* Convert the FLIP sink into a source. */ +static struct case_source * +flip_sink_make_source (struct case_sink *sink) +{ + struct flip_sink_info *sink_info = sink->aux; + struct flip_source_info *source_info; + + source_info = xmalloc (sizeof *source_info); + source_info->flip = sink_info->flip; + source_info->old_names = sink_info->old_names; + source_info->case_cnt = sink_info->case_cnt; + source_info->file = flip_file (sink_info); + fclose (sink_info->file); + + free (sink_info); - n_cases = 4 * 1024 * 1024 / ((nvar + 1) * sizeof *case_buf); - if (n_cases < 2) - n_cases = 2; + return create_case_source (&flip_source_class, source_info); +} + +/* Transposes the external file into a new file and returns a + pointer to the transposed file. */ +static FILE * +flip_file (struct flip_sink_info *info) +{ + struct flip_pgm *flip = info->flip; + size_t case_bytes; + size_t case_capacity; + size_t case_idx; + union value *input_buf, *output_buf; + FILE *input_file, *output_file; + + /* Allocate memory for many cases. */ + case_bytes = flip->var_cnt * sizeof *input_buf; + case_capacity = set_max_workspace / case_bytes; + if (case_capacity > info->case_cnt) + case_capacity = info->case_cnt; + if (case_capacity < 2) + case_capacity = 2; for (;;) { - assert (n_cases >= 2 /* 1 */); - case_buf = ((n_cases <= 2 ? xmalloc : (void *(*)(size_t)) malloc) - ((nvar + 1) * sizeof *case_buf * n_cases)); - if (case_buf) + size_t bytes = case_bytes * case_capacity; + if (case_capacity > 2) + input_buf = malloc (bytes); + else + input_buf = xmalloc (bytes); + if (input_buf != NULL) break; - n_cases /= 2; - if (n_cases < 2) - n_cases = 2; + case_capacity /= 2; + if (case_capacity < 2) + case_capacity = 2; } - /* A temporary buffer that holds n_cases elements. */ - temp_buf = &case_buf[nvar * n_cases]; + /* Use half the allocated memory for input_buf, half for + output_buf. */ + case_capacity /= 2; + output_buf = input_buf + flip->var_cnt * case_capacity; - src_file = tmpfile (); - if (!src_file) - msg (FE, _("Error creating FLIP source file.")); - - if (fseek (sink_file, 0, SEEK_SET) != 0) + input_file = info->file; + if (fseek (input_file, 0, SEEK_SET) != 0) msg (FE, _("Error rewinding FLIP file: %s."), strerror (errno)); - for (cur_case = 0; cur_case < sink_cases; ) + output_file = tmpfile (); + if (output_file == NULL) + msg (FE, _("Error creating FLIP source file.")); + + for (case_idx = 0; case_idx < info->case_cnt; ) { - unsigned long read_cases = min (sink_cases - cur_case, n_cases); + unsigned long read_cases = min (info->case_cnt - case_idx, + case_capacity); int i; - if (read_cases != fread (case_buf, sizeof *case_buf * nvar, - read_cases, sink_file)) + if (read_cases != fread (input_buf, case_bytes, read_cases, input_file)) msg (FE, _("Error reading FLIP file: %s."), strerror (errno)); - for (i = 0; i < nvar; i++) + for (i = 0; i < flip->var_cnt; i++) { unsigned long j; for (j = 0; j < read_cases; j++) - temp_buf[j] = case_buf[i + j * nvar]; + output_buf[j] = input_buf[i + j * flip->var_cnt]; - if (fseek (src_file, - sizeof *case_buf * (cur_case + i * sink_cases), - SEEK_SET) != 0) + if (fseek (output_file, + sizeof *input_buf * (case_idx + i * info->case_cnt), + SEEK_SET) != 0) msg (FE, _("Error seeking FLIP source file: %s."), strerror (errno)); - if (fwrite (temp_buf, sizeof *case_buf, read_cases, src_file) + if (fwrite (output_buf, sizeof *output_buf, read_cases, output_file) != read_cases) msg (FE, _("Error writing FLIP source file: %s."), strerror (errno)); } - cur_case += read_cases; + case_idx += read_cases; } - if (fseek (src_file, 0, SEEK_SET) != 0) + if (fseek (output_file, 0, SEEK_SET) != 0) msg (FE, _("Error rewind FLIP source file: %s."), strerror (errno)); - fclose (sink_file); - - free (case_buf); + free (input_buf); + return output_file; } -/* Change the FLIP stream from sink to source mode. */ -static void -flip_stream_mode (void) -{ - src_cases = sink_cases; - src_old_names = sink_old_names; - sink_old_names = NULL; - - if (internal) - { - if (tail) - { - tail->next = NULL; - src = head; - } - else - { - src = NULL; - src_file = NULL; - } - } - else - { - src = NULL; - transpose_external_file (); - } -} +/* FLIP sink class. */ +static const struct case_sink_class flip_sink_class = + { + "FLIP", + flip_sink_open, + flip_sink_write, + flip_sink_destroy, + flip_sink_make_source, + }; -/* Destroy source's internal data. */ +/* Reads the FLIP stream and passes it to WRITE_CASE(). */ static void -flip_stream_destroy_source (void) +flip_source_read (struct case_source *source, + write_case_func *write_case, write_case_data wc_data) { - free (src_old_names); - if (internal) + struct flip_source_info *info = source->aux; + struct flip_pgm *flip = info->flip; + int i; + char *p = info->old_names; + + for (i = 0; i < flip->var_cnt; i++) { - struct flip_case *iter, *next; + st_bare_pad_copy (temp_case->data[0].s, p, 8); + p = strchr (p, 0) + 1; - for (iter = src; iter; iter = next) - { - next = iter->next; - free (iter); - } + if (fread (&temp_case->data[1], sizeof (double), info->case_cnt, + info->file) != info->case_cnt) + msg (FE, _("Error reading FLIP source file: %s."), + strerror (errno)); + + if (!write_case (wc_data)) + return; } - else - fclose (src_file); } -/* Destroy sink's internal data. */ +/* Destroy source's internal data. */ static void -flip_stream_destroy_sink (void) +flip_source_destroy (struct case_source *source) { - struct flip_case *iter, *next; - - free (sink_old_names); - if (tail == NULL) - return; + struct flip_source_info *info = source->aux; - tail->next = NULL; - for (iter = head; iter; iter = next) - { - next = iter->next; - free (iter); - } + destroy_flip_pgm (info->flip); + free (info->old_names); + fclose (info->file); + free (info); } -struct case_stream flip_stream = +static const struct case_source_class flip_source_class = { - flip_stream_init, - flip_stream_read, - flip_stream_write, - flip_stream_mode, - flip_stream_destroy_source, - flip_stream_destroy_sink, "FLIP", + flip_source_read, + flip_source_destroy }; diff --git a/src/format.c b/src/format.c index bed72418..33a81ff3 100644 --- a/src/format.c +++ b/src/format.c @@ -340,3 +340,13 @@ parse_format_specifier (struct fmt_spec *input, int allow_xt) return 1; } +int +get_format_var_width (const struct fmt_spec *spec) +{ + if (spec->type == FMT_AHEX) + return spec->w * 2; + else if (spec->type == FMT_A) + return spec->w; + else + return 0; +} diff --git a/src/format.h b/src/format.h index 97fe0c99..7f4b5442 100644 --- a/src/format.h +++ b/src/format.h @@ -85,6 +85,7 @@ int check_input_specifier (const struct fmt_spec *spec); int check_output_specifier (const struct fmt_spec *spec); int check_string_specifier (const struct fmt_spec *spec, int min_len); void convert_fmt_ItoO (const struct fmt_spec *input, struct fmt_spec *output); +int get_format_var_width (const struct fmt_spec *); int parse_string_as_format (const char *s, int len, const struct fmt_spec *fp, int fc, union value *v); void data_out (char *s, const struct fmt_spec *fp, const union value *v); diff --git a/src/frequencies.q b/src/frequencies.q index 44393a89..0d896b47 100644 --- a/src/frequencies.q +++ b/src/frequencies.q @@ -580,6 +580,7 @@ cleanup_freq_tab (struct variable *v) { assert (v->p.frq.tab.mode == FRQM_GENERAL); free (v->p.frq.tab.valid); + hsh_destroy (v->p.frq.tab.data); } /* Parses the VARIABLES subcommand, adding to diff --git a/src/get.c b/src/get.c index 2a848d54..8cd3cf78 100644 --- a/src/get.c +++ b/src/get.c @@ -54,12 +54,6 @@ struct save_trns #define GTSV_OPT_MATCH_FILES 004 /* The MATCH FILES procedure. */ #define GTSV_OPT_NONE 0 -/* The file being read by the input program. */ -static struct file_handle *get_file; - -/* The transformation being used by the SAVE procedure. */ -static struct save_trns *trns; - static int trim_dictionary (struct dictionary * dict, int *options); static int save_write_case_func (struct ccase *, void *); static int save_trns_proc (struct trns_header *, struct ccase *); @@ -120,16 +114,21 @@ cmd_get (void) dict_destroy (default_dict); default_dict = dict; - vfm_source = &get_source; - get_file = handle; + vfm_source = create_case_source (&get_source_class, handle); return CMD_SUCCESS; } -/* Parses the SAVE (for XSAVE==0) and XSAVE (for XSAVE==1) - commands. */ +/* SAVE or XSAVE command? */ +enum save_cmd + { + CMD_SAVE, + CMD_XSAVE + }; + +/* Parses the SAVE and XSAVE commands. */ static int -cmd_save_internal (int xsave) +cmd_save_internal (enum save_cmd save_cmd) { struct file_handle *handle; struct dictionary *dict; @@ -178,7 +177,7 @@ cmd_save_internal (int xsave) } /* Fill in transformation structure. */ - t = trns = xmalloc (sizeof *t); + t = xmalloc (sizeof *t); t->h.proc = save_trns_proc; t->h.free = save_trns_free; t->f = handle; @@ -189,15 +188,16 @@ cmd_save_internal (int xsave) t->case_buf = xmalloc (sizeof *t->case_buf * inf.case_size); dict_destroy (dict); - if (xsave == 0) - /* SAVE. */ + if (save_cmd == CMD_SAVE) { - procedure (NULL, save_write_case_func, NULL, NULL); + procedure (NULL, save_write_case_func, NULL, t); save_trns_free (&t->h); } - else - /* XSAVE. */ - add_transformation (&t->h); + else + { + assert (save_cmd == CMD_XSAVE); + add_transformation (&t->h); + } return CMD_SUCCESS; } @@ -206,32 +206,26 @@ cmd_save_internal (int xsave) int cmd_save (void) { - return cmd_save_internal (0); + return cmd_save_internal (CMD_SAVE); } /* Parses the XSAVE transformation command. */ int cmd_xsave (void) { - return cmd_save_internal (1); + return cmd_save_internal (CMD_XSAVE); } -static int -save_write_case_func (struct ccase * c, void *aux UNUSED) -{ - save_trns_proc (&trns->h, c); - return 1; -} - -static int -save_trns_proc (struct trns_header * t UNUSED, struct ccase * c) +/* Writes the given C to the file specified by T. */ +static void +do_write_case (struct save_trns *t, struct ccase *c) { - flt64 *p = trns->case_buf; + flt64 *p = t->case_buf; int i; - for (i = 0; i < trns->nvar; i++) + for (i = 0; i < t->nvar; i++) { - struct variable *v = trns->var[i]; + struct variable *v = t->var[i]; if (v->type == NUMERIC) { double src = c->data[v->fv].f; @@ -249,7 +243,21 @@ save_trns_proc (struct trns_header * t UNUSED, struct ccase * c) } } - sfm_write_case (trns->f, trns->case_buf, p - trns->case_buf); + sfm_write_case (t->f, t->case_buf, p - t->case_buf); +} + +static int +save_write_case_func (struct ccase * c, void *aux UNUSED) +{ + do_write_case (aux, c); + return 1; +} + +static int +save_trns_proc (struct trns_header *h, struct ccase * c) +{ + struct save_trns *t = (struct save_trns *) h; + do_write_case (t, c); return -1; } @@ -467,33 +475,33 @@ dump_dict_variables (struct dictionary * dict) /* Clears internal state related to GET input procedure. */ static void -get_source_destroy_source (void) +get_source_destroy (struct case_source *source) { + struct file_handle *handle = source->aux; + /* It is not necessary to destroy the dictionary because if we get to this point then the dictionary is default_dict. */ - fh_close_handle (get_file); + fh_close_handle (handle); } /* Reads all the cases from the data file and passes them to write_case(). */ static void -get_source_read (write_case_func *write_case, write_case_data wc_data) +get_source_read (struct case_source *source, + write_case_func *write_case, write_case_data wc_data) { - while (sfm_read_case (get_file, temp_case->data, default_dict) + struct file_handle *handle = source->aux; + + while (sfm_read_case (handle, temp_case->data, default_dict) && write_case (wc_data)) ; - get_source_destroy_source (); } -struct case_stream get_source = +const struct case_source_class get_source_class = { - NULL, - get_source_read, - NULL, - NULL, - get_source_destroy_source, - NULL, "GET", + get_source_read, + get_source_destroy, }; @@ -1402,8 +1410,7 @@ cmd_import (void) dict_destroy (default_dict); default_dict = dict; - vfm_source = &import_source; - get_file = handle; + vfm_source = create_case_source (&import_source_class, handle); return CMD_SUCCESS; } @@ -1411,23 +1418,20 @@ cmd_import (void) /* Reads all the cases from the data file and passes them to write_case(). */ static void -import_source_read (write_case_func *write_case, write_case_data wc_data) +import_source_read (struct case_source *source, + write_case_func *write_case, write_case_data wc_data) { - while (pfm_read_case (get_file, temp_case->data, default_dict) - && write_case (wc_data)) - ; - get_source_destroy_source (); + struct file_handle *handle = source->aux; + while (pfm_read_case (handle, temp_case->data, default_dict)) + if (!write_case (wc_data)) + break; } -struct case_stream import_source = +const struct case_source_class import_source_class = { - NULL, - import_source_read, - NULL, - NULL, - get_source_destroy_source, - NULL, "IMPORT", + import_source_read, + get_source_destroy, }; static int export_write_case_func (struct ccase *c, void *); @@ -1480,7 +1484,7 @@ cmd_export (void) } /* Fill in transformation structure. */ - t = trns = xmalloc (sizeof *t); + t = xmalloc (sizeof *t); t->h.proc = save_trns_proc; t->h.free = save_trns_free; t->f = handle; @@ -1491,21 +1495,22 @@ cmd_export (void) t->case_buf = xmalloc (sizeof *t->case_buf * t->nvar); dict_destroy (dict); - procedure (NULL, export_write_case_func, NULL, NULL); + procedure (NULL, export_write_case_func, NULL, t); save_trns_free (&t->h); return CMD_SUCCESS; } static int -export_write_case_func (struct ccase *c, void *aux UNUSED) +export_write_case_func (struct ccase *c, void *aux) { - union value *p = (union value *) trns->case_buf; + struct save_trns *t = aux; + union value *p = (union value *) t->case_buf; int i; - for (i = 0; i < trns->nvar; i++) + for (i = 0; i < t->nvar; i++) { - struct variable *v = trns->var[i]; + struct variable *v = t->var[i]; if (v->type == NUMERIC) *p++ = c->data[v->fv]; @@ -1513,6 +1518,6 @@ export_write_case_func (struct ccase *c, void *aux UNUSED) (*p++).c = c->data[v->fv].s; } - pfm_write_case (trns->f, (union value *) trns->case_buf); + pfm_write_case (t->f, (union value *) t->case_buf); return 1; } diff --git a/src/glob.c b/src/glob.c index cc76e340..5bf5600f 100644 --- a/src/glob.c +++ b/src/glob.c @@ -111,9 +111,6 @@ int FILTER_before_TEMPORARY; struct file_handle *default_handle; -void (*read_active_file) (void); -void (*cancel_input_pgm) (void); - struct ctl_stmt *ctl_stack; /* log.h */ diff --git a/src/heap.c b/src/heap.c deleted file mode 100644 index c321327a..00000000 --- a/src/heap.c +++ /dev/null @@ -1,269 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ - -#include -#include "heap.h" -#include -#include -#include - -#if STANDALONE -#define GLOBAL_DEBUGGING 1 -#define _(x) (x) -#endif - -/* Creates and returns a heap with an initial capacity of M_ELEM - elements. Returns nonzero only if successful. */ -struct heap * -heap_create (size_t m_elem) -{ - struct heap *h = malloc (sizeof *h); - if (h != NULL) - { - h->n_elem = 0; - h->m_elem = m_elem; - h->elem = malloc (h->m_elem * sizeof *h->elem); - if (h->elem == NULL) - { - free (h); - h = NULL; - } - } - return h; -} - -/* Destroys the heap at *H. */ -void -heap_destroy (struct heap *h) -{ - assert (h != NULL); - free (h->elem); - free (h); -} - -/* Inserts into heap *H an element having index INDEX and key KEY. - Returns nonzero only if successful. */ -int -heap_insert (struct heap *h, int index, int key) -{ - int i, j; - - assert (h != NULL); - if (h->n_elem >= h->m_elem) - { - h->elem = realloc (h->elem, 2 * h->m_elem * sizeof *h->elem); - if (h->elem == NULL) - return 0; - h->m_elem *= 2; - } - - /* Knuth's Algorithm 5.2.3-16. Step 1. */ - j = h->n_elem + 1; - - for (;;) - { - /* Step 2. */ - i = j / 2; - - /* Step 3. */ - if (i == 0 || h->elem[i - 1].key <= key) - { - h->elem[j - 1].index = index; - h->elem[j - 1].key = key; - h->n_elem++; - return 1; - } - - /* Step 4. */ - h->elem[j - 1] = h->elem[i - 1]; - j = i; - } -} - -/* Deletes the first element in the heap (the one with the greatest - index) and returns its index, or -1 if the heap is empty. If KEY - is non-NULL then *KEY is set to the deleted element's key, if it - returns non-NULL. */ -int -heap_delete (struct heap *h, int *key) -{ - /* Knuth's Algorithm 5.2.3H-19. */ - int first, K, R, l, r, i, j; - - if (h->n_elem == 0) - return -1; - first = h->elem[0].index; - if (key) - *key = h->elem[0].key; - K = h->elem[h->n_elem - 1].key; - R = h->elem[h->n_elem - 1].index; - l = 1; - r = h->n_elem - 1; - - /* H3. */ - j = 1; - -H4: - i = j; - j *= 2; - if (j == r) - goto H6; - else if (j > r) - goto H8; - - /* H5. */ - if (h->elem[j - 1].key > h->elem[j].key) - j++; - -H6: - if (K <= h->elem[j - 1].key) - goto H8; - - /* H7. */ - h->elem[i - 1] = h->elem[j - 1]; - goto H4; - -H8: - h->elem[i - 1].key = K; - h->elem[i - 1].index = R; - - h->n_elem--; - return first; -} - -/* Returns the number of elements in heap H. */ -int -heap_size (struct heap *h) -{ - return h->n_elem; -} - -#if GLOBAL_DEBUGGING -/* Checks that a heap is really a heap. */ -void -heap_verify (const struct heap *h) -{ - size_t j; - - for (j = 1; j <= h->n_elem; j++) - { - if (j / 2 >= 1 && h->elem[j / 2 - 1].key > h->elem[j - 1].key) - printf (_("bad ordering of keys %d and %d\n"), j / 2 - 1, j - 1); - } -} - -/* Dumps out the heap on stdout. */ -void -heap_dump (const struct heap *h) -{ - size_t j; - - printf (_("Heap contents:\n")); - for (j = 1; j <= h->n_elem; j++) - { - int partner; - if (j / 2 >= 1) - partner = h->elem[j / 2 - 1].key; - else - partner = -1; - printf ("%6d-%5d", h->elem[j - 1].key, partner); - } -} -#endif /* GLOBAL_DEBUGGING */ - -#if STANDALONE -#include - -/* To perform a fairly thorough test of the heap routines, define - STANDALONE to nonzero then compile this file by itself. */ - -/* Compares the second elements of the integer arrays at _A and _B and - returns a strcmp()-type result. */ -int -compare_int2 (const void *pa, const void *pb) -{ - int *a = (int *) pa; - int *b = (int *) pb; - - return a[1] - b[1]; -} - -#define N_ELEM 16 - -/* Arrange the N elements of ARRAY in random order. */ -void -shuffle (int (*array)[2], int n) -{ - int i; - - for (i = 0; i < n; i++) - { - int j = i + rand () % (n - i); - int t = array[j][0], s = array[j][1]; - array[j][0] = array[i][0], array[j][1] = array[i][1]; - array[i][0] = t, array[i][1] = s; - } -} - -/* Test routine. */ -int -main (void) -{ - struct heap *h; - int i; - int array[N_ELEM][2]; - - srand (time (0)); - - h = heap_create (16); - for (i = 0; i < N_ELEM; i++) - { - array[i][0] = i; - array[i][1] = N_ELEM - i - 1; - } - shuffle (array, N_ELEM); - - printf ("Insertion order:\n"); - for (i = 0; i < N_ELEM; i++) - { - printf ("(%d,%d) ", array[i][0], array[i][1]); - heap_insert (h, array[i][0], array[i][1]); - heap_verify (h); - } - putchar ('\n'); - - /*heap_dump(&h); */ - - printf ("\nDeletion order:\n"); - for (i = 0; i < N_ELEM; i++) - { - int index, key; - index = heap_delete (h, &key); - assert (index != -1); - printf ("(%d,%d) ", index, key); - fflush (stdout); - assert (index == N_ELEM - i - 1 && key == i); - heap_verify (h); - } - putchar ('\n'); - heap_destroy (h); - - return 0; -} -#endif diff --git a/src/heap.h b/src/heap.h deleted file mode 100644 index f2dc8697..00000000 --- a/src/heap.h +++ /dev/null @@ -1,54 +0,0 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ - -#if !heap_h -#define heap_h 1 - -/* This module implements a priority queue as a heap as described in - Knuth 5.2.3. This is a first-in-smallest-out priority queue. */ - -#include - -/* One element of a heap. */ -struct heap_elem - { - int index; /* Data. */ - int key; /* Key value. */ - }; - -/* An entire heap. */ -struct heap - { - size_t n_elem; /* Number of elements in heap. */ - size_t m_elem; /* Number of elements allocated for heap. */ - struct heap_elem *elem; /* Heap elements. */ - }; - -struct heap *heap_create (size_t m_elem); -void heap_destroy (struct heap *); -int heap_insert (struct heap *, int index, int key); -int heap_delete (struct heap *, int *key); -int heap_size (struct heap *); - -#if GLOBAL_DEBUGGING -void heap_verify (const struct heap *); -void heap_dump (const struct heap *); -#endif - -#endif /* heap_h */ diff --git a/src/inpt-pgm.c b/src/inpt-pgm.c index 425c9362..5acf2df4 100644 --- a/src/inpt-pgm.c +++ b/src/inpt-pgm.c @@ -46,12 +46,11 @@ enum value_init_type INP_REINIT = 0, /* Reinitialize for each iteration. */ }; -/* Array that tells INPUT PROGRAM how to initialize each `union - value'. */ -static enum value_init_type *inp_init; - -/* Number of bytes allocated for inp_init. */ -static size_t inp_nval; +struct input_program_pgm + { + enum value_init_type *init; /* How to initialize each `union value'. */ + size_t init_cnt; /* Number of elements in inp_init. */ + }; static int end_case_trns_proc (struct trns_header *, struct ccase *); static int end_file_trns_proc (struct trns_header * t, struct ccase * c); @@ -65,7 +64,7 @@ cmd_input_program (void) lex_match_id ("PROGRAM"); discard_variables (); - vfm_source = &input_program_source; + vfm_source = create_case_source (&input_program_source_class, NULL); return lex_end_of_command (); } @@ -73,13 +72,14 @@ cmd_input_program (void) int cmd_end_input_program (void) { + struct input_program_pgm *inp; size_t i; lex_match_id ("END"); lex_match_id ("INPUT"); lex_match_id ("PROGRAM"); - if (vfm_source != &input_program_source) + if (!case_source_is_class (vfm_source, &input_program_source_class)) { msg (SE, _("No matching INPUT PROGRAM command.")); return CMD_FAILURE; @@ -89,15 +89,16 @@ cmd_end_input_program (void) msg (SW, _("No data-input or transformation commands specified " "between INPUT PROGRAM and END INPUT PROGRAM.")); - /* Mark the boundary between INPUT PROGRAM and more-mundane - transformations. */ + /* Mark the boundary between INPUT PROGRAM transformations and + ordinary transformations. */ f_trns = n_trns; /* Figure out how to initialize temp_case. */ - inp_nval = dict_get_next_value_idx (default_dict); - inp_init = xmalloc (inp_nval * sizeof *inp_init); - for (i = 0; i < inp_nval; i++) - inp_init[i] = -1; + inp = xmalloc (sizeof *inp); + inp->init_cnt = dict_get_next_value_idx (default_dict); + inp->init = xmalloc (inp->init_cnt * sizeof *inp->init); + for (i = 0; i < inp->init_cnt; i++) + inp->init[i] = -1; for (i = 0; i < dict_get_var_cnt (default_dict); i++) { struct variable *var = dict_get_var (default_dict, i); @@ -108,22 +109,25 @@ cmd_end_input_program (void) value_init |= var->reinit ? INP_REINIT : INP_INIT_ONCE; for (j = 0; j < var->nv; j++) - inp_init[j + var->fv] = value_init; + inp->init[j + var->fv] = value_init; } - for (i = 0; i < inp_nval; i++) - assert (inp_init[i] != -1); + for (i = 0; i < inp->init_cnt; i++) + assert (inp->init[i] != -1); + + /* Put inp into vfm_source for later use. */ + vfm_source->aux = inp; return lex_end_of_command (); } /* Initializes temp_case. Called before the first case is read. */ static void -init_case (void) +init_case (struct input_program_pgm *inp) { size_t i; - for (i = 0; i < inp_nval; i++) - switch (inp_init[i]) + for (i = 0; i < inp->init_cnt; i++) + switch (inp->init[i]) { case INP_NUMERIC | INP_INIT_ONCE: temp_case->data[i].f = 0.0; @@ -142,12 +146,12 @@ init_case (void) /* Clears temp_case. Called between reading successive records. */ static void -clear_case (void) +clear_case (struct input_program_pgm *inp) { size_t i; - for (i = 0; i < inp_nval; i++) - switch (inp_init[i]) + for (i = 0; i < inp->init_cnt; i++) + switch (inp->init[i]) { case INP_NUMERIC | INP_INIT_ONCE: break; @@ -169,17 +173,21 @@ clear_case (void) file. -1 means go on to the next transformation. Otherwise the return value is the index of the transformation to go to next. */ static void -input_program_source_read (write_case_func *write_case, +input_program_source_read (struct case_source *source, + write_case_func *write_case, write_case_data wc_data) { + struct input_program_pgm *inp = source->aux; int i; /* Nonzero if there were any END CASE commands in the set of - transformations. */ + transformations. If so, we don't automatically write out + cases. */ int end_case = 0; - /* We don't automatically write out cases if the user took over - that prerogative. */ + assert (inp != NULL); + + /* Figure end_case. */ for (i = 0; i < f_trns; i++) if (t_trns[i]->proc == end_case_trns_proc) end_case = 1; @@ -190,7 +198,7 @@ input_program_source_read (write_case_func *write_case, if (t_trns[i]->proc == repeating_data_trns_proc) repeating_data_set_write_case (t_trns[i], write_case, wc_data); - init_case (); + init_case (inp); for (;;) { /* Index of current transformation. */ @@ -214,7 +222,7 @@ input_program_source_read (write_case_func *write_case, { if (!write_case (wc_data)) return; - clear_case (); + clear_case (inp); i++; continue; } @@ -247,27 +255,29 @@ input_program_source_read (write_case_func *write_case, /* Blank out the case for the next iteration. */ next_case: - clear_case (); + clear_case (inp); } } static void -input_program_source_destroy_source (void) +input_program_source_destroy (struct case_source *source) { + struct input_program_pgm *inp = source->aux; + cancel_transformations (); - free (inp_init); - inp_init = NULL; + + if (inp != NULL) + { + free (inp->init); + free (inp); + } } -struct case_stream input_program_source = +const struct case_source_class input_program_source_class = { - NULL, - input_program_source_read, - NULL, - NULL, - input_program_source_destroy_source, - NULL, "INPUT PROGRAM", + input_program_source_read, + input_program_source_destroy, }; int @@ -278,7 +288,7 @@ cmd_end_case (void) lex_match_id ("END"); lex_match_id ("CASE"); - if (vfm_source != &input_program_source) + if (!case_source_is_class (vfm_source, &input_program_source_class)) { msg (SE, _("This command may only be executed between INPUT PROGRAM " "and END INPUT PROGRAM.")); @@ -415,7 +425,7 @@ cmd_end_file (void) lex_match_id ("END"); lex_match_id ("FILE"); - if (vfm_source != &input_program_source) + if (!case_source_is_class (vfm_source, &input_program_source_class)) { msg (SE, _("This command may only be executed between INPUT PROGRAM " "and END INPUT PROGRAM.")); diff --git a/src/levene.c b/src/levene.c index 9f2d3b19..67e52d38 100644 --- a/src/levene.c +++ b/src/levene.c @@ -29,6 +29,7 @@ #include "stats.h" #include +#include /* This module calculates the Levene statistic for variables. diff --git a/src/matrix-data.c b/src/matrix-data.c index 9502c575..7c3411bc 100644 --- a/src/matrix-data.c +++ b/src/matrix-data.c @@ -42,14 +42,14 @@ too. */ /* Format type enums. */ -enum +enum format_type { LIST, FREE }; /* Matrix section enums. */ -enum +enum matrix_section { LOWER, UPPER, @@ -57,14 +57,14 @@ enum }; /* Diagonal inclusion enums. */ -enum +enum include_diagonal { DIAGONAL, NODIAGONAL }; /* CONTENTS types. */ -enum +enum content_type { N_VECTOR, N_SCALAR, @@ -85,7 +85,7 @@ enum }; /* 0=vector, 1=matrix, 2=scalar. */ -static int content_type[PROX + 1] = +static const int content_type[PROX + 1] = { 0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, }; @@ -97,53 +97,48 @@ static const char *content_names[PROX + 1] = "DFE", "MAT", "COV", "CORR", "PROX", }; -/* The data file to be read. */ -static struct file_handle *data_file; - -/* Format type. */ -static int fmt; /* LIST or FREE. */ -static int section; /* LOWER or UPPER or FULL. */ -static int diag; /* DIAGONAL or NODIAGONAL. */ - -/* Arena used for all the MATRIX DATA allocations. */ -static struct pool *container; - -/* ROWTYPE_ specified explicitly in data? */ -static int explicit_rowtype; - -/* ROWTYPE_, VARNAME_ variables. */ -static struct variable *rowtype_, *varname_; +/* A MATRIX DATA input program. */ +struct matrix_data_pgm + { + struct pool *container; /* Arena used for all allocations. */ + struct file_handle *data_file; /* The data file to be read. */ -/* Is is per-factor data? */ -int is_per_factor[PROX + 1]; + /* Format. */ + enum format_type fmt; /* LIST or FREE. */ + enum matrix_section section;/* LOWER or UPPER or FULL. */ + enum include_diagonal diag; /* DIAGONAL or NODIAGONAL. */ -/* Single SPLIT FILE variable. */ -static struct variable *single_split; + int explicit_rowtype; /* ROWTYPE_ specified explicitly in data? */ + struct variable *rowtype_, *varname_; /* ROWTYPE_, VARNAME_ variables. */ + + struct variable *single_split; /* Single SPLIT FILE variable. */ -/* Factor variables. */ -static int n_factors; -static struct variable **factors; + /* Factor variables. */ + int n_factors; /* Number of factor variables. */ + struct variable **factors; /* Factor variables. */ + int is_per_factor[PROX + 1]; /* Is there per-factor data? */ -/* Number of cells, or -1 if none. */ -static int cells; + int cells; /* Number of cells, or -1 if none. */ -/* Population N specified by user. */ -static int pop_n; + int pop_n; /* Population N specified by user. */ -/* CONTENTS subcommand. */ -static int contents[EOC * 3 + 1]; -static int n_contents; + /* CONTENTS subcommand. */ + int contents[EOC * 3 + 1]; /* Contents. */ + int n_contents; /* Number of entries. */ -/* Number of continuous variables. */ -static int n_continuous; + + int n_continuous; /* Number of continuous variables. */ + int first_continuous; /* Index into default_dict.var of + first continuous variable. */ + }; -/* Index into default_dict.var of first continuous variables. */ -static int first_continuous; +static const struct case_source_class matrix_data_with_rowtype_source_class; +static const struct case_source_class matrix_data_without_rowtype_source_class; static int compare_variables_by_mxd_vartype (const void *pa, const void *pb); -static void read_matrices_without_rowtype (void); -static void read_matrices_with_rowtype (void); +static void read_matrices_without_rowtype (struct matrix_data_pgm *); +static void read_matrices_with_rowtype (struct matrix_data_pgm *); static int string_to_content_type (char *, int *); #if DEBUGGING @@ -153,25 +148,35 @@ static void debug_print (void); int cmd_matrix_data (void) { + struct pool *pool; + struct matrix_data_pgm *mx; + unsigned seen = 0; lex_match_id ("MATRIX"); lex_match_id ("DATA"); - container = pool_create (); - discard_variables (); - data_file = inline_file; - fmt = LIST; - section = LOWER; - diag = DIAGONAL; - single_split = NULL; - n_factors = 0; - factors = NULL; - cells = -1; - pop_n = -1; - n_contents = 0; + pool = pool_create (); + mx = pool_alloc (pool, sizeof *mx); + mx->container = pool; + mx->data_file = inline_file; + mx->fmt = LIST; + mx->section = LOWER; + mx->diag = DIAGONAL; + mx->explicit_rowtype = 0; + mx->rowtype_ = NULL; + mx->varname_ = NULL; + mx->single_split = NULL; + mx->n_factors = 0; + mx->factors = NULL; + memset (mx->is_per_factor, 0, sizeof mx->is_per_factor); + mx->cells = -1; + mx->pop_n = -1; + mx->n_contents = 0; + mx->n_continuous = 0; + mx->first_continuous = 0; while (token != '.') { lex_match ('/'); @@ -221,23 +226,24 @@ cmd_matrix_data (void) new_var->p.mxd.subtype = i; } else - explicit_rowtype = 1; + mx->explicit_rowtype = 1; free (v[i]); } free (v); } { - rowtype_ = dict_create_var_assert (default_dict, "ROWTYPE_", 8); - rowtype_->p.mxd.vartype = MXD_ROWTYPE; - rowtype_->p.mxd.subtype = 0; + mx->rowtype_ = dict_create_var_assert (default_dict, + "ROWTYPE_", 8); + mx->rowtype_->p.mxd.vartype = MXD_ROWTYPE; + mx->rowtype_->p.mxd.subtype = 0; } } else if (lex_match_id ("FILE")) { lex_match ('='); - data_file = fh_parse_file_handle (); - if (!data_file) + mx->data_file = fh_parse_file_handle (); + if (mx->data_file == NULL) goto lossage; } else if (lex_match_id ("FORMAT")) @@ -247,19 +253,19 @@ cmd_matrix_data (void) while (token == T_ID) { if (lex_match_id ("LIST")) - fmt = LIST; + mx->fmt = LIST; else if (lex_match_id ("FREE")) - fmt = FREE; + mx->fmt = FREE; else if (lex_match_id ("LOWER")) - section = LOWER; + mx->section = LOWER; else if (lex_match_id ("UPPER")) - section = UPPER; + mx->section = UPPER; else if (lex_match_id ("FULL")) - section = FULL; + mx->section = FULL; else if (lex_match_id ("DIAGONAL")) - diag = DIAGONAL; + mx->diag = DIAGONAL; else if (lex_match_id ("NODIAGONAL")) - diag = NODIAGONAL; + mx->diag = NODIAGONAL; else { lex_error (_("in FORMAT subcommand")); @@ -294,12 +300,13 @@ cmd_matrix_data (void) goto lossage; } - single_split = dict_create_var_assert (default_dict, tokid, 0); + mx->single_split = dict_create_var_assert (default_dict, + tokid, 0); lex_get (); - single_split->p.mxd.vartype = MXD_CONTINUOUS; + mx->single_split->p.mxd.vartype = MXD_CONTINUOUS; - dict_set_split_vars (default_dict, &single_split, 1); + dict_set_split_vars (default_dict, &mx->single_split, 1); } else { @@ -341,22 +348,22 @@ cmd_matrix_data (void) } seen |= 4; - if (!parse_variables (default_dict, &factors, &n_factors, PV_NONE)) + if (!parse_variables (default_dict, &mx->factors, &mx->n_factors, PV_NONE)) goto lossage; { int i; - for (i = 0; i < n_factors; i++) + for (i = 0; i < mx->n_factors; i++) { - if (factors[i]->p.mxd.vartype != MXD_CONTINUOUS) + if (mx->factors[i]->p.mxd.vartype != MXD_CONTINUOUS) { msg (SE, _("Factor variable %s is already another type."), tokid); goto lossage; } - factors[i]->p.mxd.vartype = MXD_FACTOR; - factors[i]->p.mxd.subtype = i; + mx->factors[i]->p.mxd.vartype = MXD_FACTOR; + mx->factors[i]->p.mxd.subtype = i; } } } @@ -364,7 +371,7 @@ cmd_matrix_data (void) { lex_match ('='); - if (cells != -1) + if (mx->cells != -1) { msg (SE, _("CELLS subcommand multiply specified.")); goto lossage; @@ -376,14 +383,14 @@ cmd_matrix_data (void) goto lossage; } - cells = lex_integer (); + mx->cells = lex_integer (); lex_get (); } else if (lex_match_id ("N")) { lex_match ('='); - if (pop_n != -1) + if (mx->pop_n != -1) { msg (SE, _("N subcommand multiply specified.")); goto lossage; @@ -395,7 +402,7 @@ cmd_matrix_data (void) goto lossage; } - pop_n = lex_integer (); + mx->pop_n = lex_integer (); lex_get (); } else if (lex_match_id ("CONTENTS")) @@ -417,7 +424,7 @@ cmd_matrix_data (void) int i; for (i = 0; i <= PROX; i++) - is_per_factor[i] = 0; + mx->is_per_factor[i] = 0; } for (;;) @@ -439,7 +446,7 @@ cmd_matrix_data (void) msg (SE, _("Mismatched right parenthesis (`(').")); goto lossage; } - if (contents[n_contents - 1] == LPAREN) + if (mx->contents[mx->n_contents - 1] == LPAREN) { msg (SE, _("Empty parentheses not allowed.")); goto lossage; @@ -476,9 +483,9 @@ cmd_matrix_data (void) collide |= (1 << collide_index); item = content_type; - is_per_factor[item] = inside_parens; + mx->is_per_factor[item] = inside_parens; } - contents[n_contents++] = item; + mx->contents[mx->n_contents++] = item; if (token == '/' || token == '.') break; @@ -489,7 +496,7 @@ cmd_matrix_data (void) msg (SE, _("Missing right parenthesis.")); goto lossage; } - contents[n_contents] = EOC; + mx->contents[mx->n_contents] = EOC; } else { @@ -510,17 +517,17 @@ cmd_matrix_data (void) goto lossage; } - if (!n_contents && !explicit_rowtype) + if (!mx->n_contents && !mx->explicit_rowtype) { msg (SW, _("CONTENTS subcommand not specified: assuming file " "contains only CORR matrix.")); - contents[0] = CORR; - contents[1] = EOC; - n_contents = 0; + mx->contents[0] = CORR; + mx->contents[1] = EOC; + mx->n_contents = 0; } - if (n_factors && !explicit_rowtype && cells == -1) + if (mx->n_factors && !mx->explicit_rowtype && mx->cells == -1) { msg (SE, _("Missing CELLS subcommand. CELLS is required " "when ROWTYPE_ is not given in the data and " @@ -528,7 +535,7 @@ cmd_matrix_data (void) goto lossage; } - if (explicit_rowtype && single_split) + if (mx->explicit_rowtype && mx->single_split) { msg (SE, _("Split file values must be present in the data when " "ROWTYPE_ is present.")); @@ -537,9 +544,9 @@ cmd_matrix_data (void) /* Create VARNAME_. */ { - varname_ = dict_create_var_assert (default_dict, "VARNAME_", 8); - varname_->p.mxd.vartype = MXD_VARNAME; - varname_->p.mxd.subtype = 0; + mx->varname_ = dict_create_var_assert (default_dict, "VARNAME_", 8); + mx->varname_->p.mxd.vartype = MXD_VARNAME; + mx->varname_->p.mxd.subtype = 0; } /* Sort the dictionary variables into the desired order for the @@ -567,7 +574,7 @@ cmd_matrix_data (void) int i; - first_continuous = -1; + mx->first_continuous = -1; for (i = 0; i < dict_get_var_cnt (default_dict); i++) { struct variable *v = dict_get_var (default_dict, i); @@ -577,13 +584,13 @@ cmd_matrix_data (void) v->print = v->write = fmt_tab[type]; if (type == MXD_CONTINUOUS) - n_continuous++; - if (first_continuous == -1 && type == MXD_CONTINUOUS) - first_continuous = i; + mx->n_continuous++; + if (mx->first_continuous == -1 && type == MXD_CONTINUOUS) + mx->first_continuous = i; } } - if (n_continuous == 0) + if (mx->n_continuous == 0) { msg (SE, _("No continuous variables specified.")); goto lossage; @@ -593,19 +600,19 @@ cmd_matrix_data (void) debug_print (); #endif - if (explicit_rowtype) - read_matrices_with_rowtype (); + if (mx->explicit_rowtype) + read_matrices_with_rowtype (mx); else - read_matrices_without_rowtype (); + read_matrices_without_rowtype (mx); - pool_destroy (container); + pool_destroy (mx->container); return CMD_SUCCESS; lossage: discard_variables (); - free (factors); - pool_destroy (container); + free (mx->factors); + pool_destroy (mx->container); return CMD_FAILURE; } @@ -732,25 +739,25 @@ debug_print (void) if (cells != -1) printf ("\t/CELLS=%d\n", cells); - if (pop_n != -1) - printf ("\t/N=%d\n", pop_n); + if (mx->pop_n != -1) + printf ("\t/N=%d\n", mx->pop_n); - if (n_contents) + if (mx->n_contents) { int i; int space = 0; printf ("\t/CONTENTS="); - for (i = 0; i < n_contents; i++) + for (i = 0; i < mx->n_contents; i++) { - if (contents[i] == LPAREN) + if (mx->contents[i] == LPAREN) { if (space) printf (" "); printf ("("); space = 0; } - else if (contents[i] == RPAREN) + else if (mx->contents[i] == RPAREN) { printf (")"); space = 1; @@ -758,10 +765,10 @@ debug_print (void) else { - assert (contents[i] >= 0 && contents[i] <= PROX); + assert (mx->contents[i] >= 0 && mx->contents[i] <= PROX); if (space) printf (" "); - printf ("%s", content_names[contents[i]]); + printf ("%s", content_names[mx->contents[i]]); space = 1; } } @@ -773,66 +780,54 @@ debug_print (void) /* Matrix tokenizer. */ /* Matrix token types. */ -enum +enum matrix_token_type { - MNULL, /* No token. */ MNUM, /* Number. */ - MSTR, /* String. */ - MSTOP /* End of file. */ + MSTR /* String. */ }; -/* Current matrix token. */ -static int mtoken; - -/* Token string if applicable; not null-terminated. */ -static char *mtokstr; - -/* Length of mtokstr in characters. */ -static int mtoklen; - -/* Token value if applicable. */ -static double mtokval; +struct matrix_token + { + enum matrix_token_type type; + double number; /* MNUM: token value. */ + char *string; /* MSTR: token string; not null-terminated. */ + int length; /* MSTR: tokstr length. */ + }; -static int mget_token (void); +static int mget_token (struct matrix_token *, struct file_handle *); #if DEBUGGING -#define mget_token() mget_token_dump() - -static int -mget_token_dump (void) -{ - int result = (mget_token) (); - mdump_token (); - return result; -} +#define mget_token(TOKEN, HANDLE) mget_token_dump(TOKEN, HANDLE) static void -mdump_token (void) +mdump_token (const struct matrix_token *token) { - switch (mtoken) + switch (token->type) { - case MNULL: - printf (" "); - break; case MNUM: - printf (" #%g", mtokval); + printf (" #%g", token->number); break; case MSTR: - printf (" #'%.*s'", mtoklen, mtokstr); - break; - case MSTOP: - printf (" "); + printf (" '%.*s'", token->length, token->string); break; default: assert (0); } fflush (stdout); } + +static int +mget_token_dump (struct matrix_token *token, struct file_handle *data_file) +{ + int result = (mget_token) (token, data_file); + mdump_token (token); + return result; +} #endif -/* Return the current position in the data file. */ +/* Return the current position in DATA_FILE. */ static const char * -context (void) +context (struct file_handle *data_file) { static char buf[32]; int len; @@ -858,14 +853,11 @@ context (void) /* Is there at least one token left in the data file? */ static int -another_token (void) +another_token (struct file_handle *data_file) { char *cp, *ep; int len; - if (mtoken == MSTOP) - return 0; - for (;;) { cp = dfm_get_record (data_file, &len); @@ -887,9 +879,9 @@ another_token (void) return 1; } -/* Parse a MATRIX DATA token from data_file into mtok*. */ +/* Parse a MATRIX DATA token from mx->data_file into TOKEN. */ static int -(mget_token) (void) +(mget_token) (struct matrix_token *token, struct file_handle *data_file) { char *cp, *ep; int len; @@ -899,12 +891,7 @@ static int { cp = dfm_get_record (data_file, &len); if (!cp) - { - if (mtoken == MSTOP) - return 0; - mtoken = MSTOP; - return 1; - } + return 0; ep = cp + len; while (isspace ((unsigned char) *cp) && cp < ep) @@ -924,11 +911,11 @@ static int { int quote = *cp; - mtoken = MSTR; - mtokstr = ++cp; + token->type = MSTR; + token->string = ++cp; while (cp < ep && *cp != quote) cp++; - mtoklen = cp - mtokstr; + token->length = cp - token->string; if (cp < ep) cp++; else @@ -938,7 +925,7 @@ static int { int is_num = isdigit ((unsigned char) *cp) || *cp == '.'; - mtokstr = cp++; + token->string = cp++; while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ',' && *cp != '-' && *cp != '+') { @@ -953,26 +940,26 @@ static int cp++; } - mtoklen = cp - mtokstr; - assert (mtoklen); + token->length = cp - token->string; + assert (token->length); if (is_num) { struct data_in di; - di.s = mtokstr; - di.e = mtokstr + mtoklen; - di.v = (union value *) &mtokval; + di.s = token->string; + di.e = token->string + token->length; + di.v = (union value *) &token->number; di.f1 = first_column; di.format.type = FMT_F; - di.format.w = mtoklen; + di.format.w = token->length; di.format.d = 0; if (!data_in (&di)) return 0; } else - mtoken = MSTR; + token->type = MSTR; } dfm_set_record (data_file, cp); @@ -981,16 +968,13 @@ static int } /* Forcibly skip the end of a line for content type CONTENT in - data_file. */ + DATA_FILE. */ static int -force_eol (const char *content) +force_eol (struct file_handle *data_file, const char *content) { char *cp; int len; - if (fmt == FREE) - return 1; - cp = dfm_get_record (data_file, &len); if (!cp) return 0; @@ -1000,7 +984,7 @@ force_eol (const char *content) if (len) { msg (SE, _("End of line expected %s while reading %s."), - context (), content); + context (data_file), content); return 0; } @@ -1023,73 +1007,74 @@ static int max_cell_index; /* SPLIT FILE variable values. */ static double *split_values; -static int nr_read_splits (int compare); -static int nr_read_factors (int cell); -static void nr_output_data (write_case_func *, write_case_data); -static void matrix_data_read_without_rowtype (write_case_func *, +static int nr_read_splits (struct matrix_data_pgm *, int compare); +static int nr_read_factors (struct matrix_data_pgm *, int cell); +static void nr_output_data (struct matrix_data_pgm *, + write_case_func *, write_case_data); +static void matrix_data_read_without_rowtype (struct case_source *source, + write_case_func *, write_case_data); /* Read from the data file and write it to the active file. */ static void -read_matrices_without_rowtype (void) +read_matrices_without_rowtype (struct matrix_data_pgm *mx) { - if (cells == -1) - cells = 1; + if (mx->cells == -1) + mx->cells = 1; - mtoken = MNULL; split_values = xmalloc (sizeof *split_values * dict_get_split_cnt (default_dict)); - nr_factor_values = xmalloc (sizeof *nr_factor_values * n_factors * cells); + nr_factor_values = xmalloc (sizeof *nr_factor_values * mx->n_factors * mx->cells); max_cell_index = 0; - matrix_data_source.read = matrix_data_read_without_rowtype; - vfm_source = &matrix_data_source; + vfm_source = create_case_source (&matrix_data_without_rowtype_source_class, + mx); procedure (NULL, NULL, NULL, NULL); free (split_values); free (nr_factor_values); - fh_close_handle (data_file); + fh_close_handle (mx->data_file); } /* Mirror data across the diagonal of matrix CP which contains CONTENT type data. */ static void -fill_matrix (int content, double *cp) +fill_matrix (struct matrix_data_pgm *mx, int content, double *cp) { int type = content_type[content]; - if (type == 1 && section != FULL) + if (type == 1 && mx->section != FULL) { - if (diag == NODIAGONAL) + if (mx->diag == NODIAGONAL) { const double fill = content == CORR ? 1.0 : SYSMIS; int i; - for (i = 0; i < n_continuous; i++) - cp[i * (1 + n_continuous)] = fill; + for (i = 0; i < mx->n_continuous; i++) + cp[i * (1 + mx->n_continuous)] = fill; } { int c, r; - if (section == LOWER) + if (mx->section == LOWER) { - int n_lines = n_continuous; - if (section != FULL && diag == NODIAGONAL) + int n_lines = mx->n_continuous; + if (mx->section != FULL && mx->diag == NODIAGONAL) n_lines--; for (r = 1; r < n_lines; r++) for (c = 0; c < r; c++) - cp[r + c * n_continuous] = cp[c + r * n_continuous]; + cp[r + c * mx->n_continuous] = cp[c + r * mx->n_continuous]; } else { - assert (section == UPPER); - for (r = 1; r < n_continuous; r++) + assert (mx->section == UPPER); + for (r = 1; r < mx->n_continuous; r++) for (c = 0; c < r; c++) - cp[c + r * n_continuous] = cp[r + c * n_continuous]; + cp[c + r * mx->n_continuous] = cp[r + c * mx->n_continuous]; } } } @@ -1097,16 +1082,17 @@ fill_matrix (int content, double *cp) { int c; - for (c = 1; c < n_continuous; c++) + for (c = 1; c < mx->n_continuous; c++) cp[c] = cp[0]; } } -/* Read data lines for content type CONTENT from the data file. If - PER_FACTOR is nonzero, then factor information is read from the - data file. Data is for cell number CELL. */ +/* Read data lines for content type CONTENT from the data file. + If PER_FACTOR is nonzero, then factor information is read from + the data file. Data is for cell number CELL. */ static int -nr_read_data_lines (int per_factor, int cell, int content, int compare) +nr_read_data_lines (struct matrix_data_pgm *mx, + int per_factor, int cell, int content, int compare) { /* Content type. */ const int type = content_type[content]; @@ -1125,47 +1111,47 @@ nr_read_data_lines (int per_factor, int cell, int content, int compare) n_lines = 1; else { - n_lines = n_continuous; - if (section != FULL && diag == NODIAGONAL) + n_lines = mx->n_continuous; + if (mx->section != FULL && mx->diag == NODIAGONAL) n_lines--; } cp = nr_data[content][cell]; - if (type == 1 && section == LOWER && diag == NODIAGONAL) - cp += n_continuous; + if (type == 1 && mx->section == LOWER && mx->diag == NODIAGONAL) + cp += mx->n_continuous; for (i = 0; i < n_lines; i++) { int n_cols; - if (!nr_read_splits (1)) + if (!nr_read_splits (mx, 1)) return 0; - if (per_factor && !nr_read_factors (cell)) + if (per_factor && !nr_read_factors (mx, cell)) return 0; compare = 1; switch (type) { case 0: - n_cols = n_continuous; + n_cols = mx->n_continuous; break; case 1: - switch (section) + switch (mx->section) { case LOWER: n_cols = i + 1; break; case UPPER: cp += i; - n_cols = n_continuous - i; - if (diag == NODIAGONAL) + n_cols = mx->n_continuous - i; + if (mx->diag == NODIAGONAL) { n_cols--; cp++; } break; case FULL: - n_cols = n_continuous; + n_cols = mx->n_continuous; break; default: assert (0); @@ -1183,27 +1169,30 @@ nr_read_data_lines (int per_factor, int cell, int content, int compare) for (j = 0; j < n_cols; j++) { - if (!mget_token ()) + struct matrix_token token; + if (!mget_token (&token, mx->data_file)) return 0; - if (mtoken != MNUM) + if (token.type != MNUM) { msg (SE, _("expecting value for %s %s"), - dict_get_var (default_dict, j)->name, context ()); + dict_get_var (default_dict, j)->name, + context (mx->data_file)); return 0; } - *cp++ = mtokval; + *cp++ = token.number; } - if (!force_eol (content_names[content])) + if (mx->fmt != FREE + && !force_eol (mx->data_file, content_names[content])) return 0; debug_printf (("\n")); } - if (section == LOWER) - cp += n_continuous - n_cols; + if (mx->section == LOWER) + cp += mx->n_continuous - n_cols; } - fill_matrix (content, nr_data[content][cell]); + fill_matrix (mx, content, nr_data[content][cell]); return 1; } @@ -1211,13 +1200,16 @@ nr_read_data_lines (int per_factor, int cell, int content, int compare) /* When ROWTYPE_ does not appear in the data, reads the matrices and writes them to the output file. Returns success. */ static void -matrix_data_read_without_rowtype (write_case_func *write_case, +matrix_data_read_without_rowtype (struct case_source *source, + write_case_func *write_case, write_case_data wc_data) { + struct matrix_data_pgm *mx = source->aux; + { int *cp; - nr_data = pool_alloc (container, (PROX + 1) * sizeof *nr_data); + nr_data = pool_alloc (mx->container, (PROX + 1) * sizeof *nr_data); { int i; @@ -1226,25 +1218,25 @@ matrix_data_read_without_rowtype (write_case_func *write_case, nr_data[i] = NULL; } - for (cp = contents; *cp != EOC; cp++) + for (cp = mx->contents; *cp != EOC; cp++) if (*cp != LPAREN && *cp != RPAREN) { - int per_factor = is_per_factor[*cp]; + int per_factor = mx->is_per_factor[*cp]; int n_entries; - n_entries = n_continuous; + n_entries = mx->n_continuous; if (content_type[*cp] == 1) - n_entries *= n_continuous; + n_entries *= mx->n_continuous; { - int n_vectors = per_factor ? cells : 1; + int n_vectors = per_factor ? mx->cells : 1; int i; - nr_data[*cp] = pool_alloc (container, + nr_data[*cp] = pool_alloc (mx->container, n_vectors * sizeof **nr_data); for (i = 0; i < n_vectors; i++) - nr_data[*cp][i] = pool_alloc (container, + nr_data[*cp][i] = pool_alloc (mx->container, n_entries * sizeof ***nr_data); } } @@ -1254,10 +1246,10 @@ matrix_data_read_without_rowtype (write_case_func *write_case, { int *bp, *ep, *np; - if (!nr_read_splits (0)) + if (!nr_read_splits (mx, 0)) return; - for (bp = contents; *bp != EOC; bp = np) + for (bp = mx->contents; *bp != EOC; bp = np) { int per_factor; @@ -1284,20 +1276,21 @@ matrix_data_read_without_rowtype (write_case_func *write_case, { int i; - for (i = 0; i < (per_factor ? cells : 1); i++) + for (i = 0; i < (per_factor ? mx->cells : 1); i++) { int *cp; for (cp = bp; cp < ep; cp++) - if (!nr_read_data_lines (per_factor, i, *cp, cp != bp)) + if (!nr_read_data_lines (mx, per_factor, i, *cp, cp != bp)) return; } } } - nr_output_data (write_case, wc_data); + nr_output_data (mx, write_case, wc_data); - if (dict_get_split_cnt (default_dict) == 0 || !another_token ()) + if (dict_get_split_cnt (default_dict) == 0 + || !another_token (mx->data_file)) return; } } @@ -1306,7 +1299,7 @@ matrix_data_read_without_rowtype (write_case_func *write_case, values read to the last values read and returns 1 if they're equal, 0 otherwise. */ static int -nr_read_splits (int compare) +nr_read_splits (struct matrix_data_pgm *mx, int compare) { static int just_read = 0; size_t split_cnt; @@ -1321,7 +1314,7 @@ nr_read_splits (int compare) if (dict_get_split_vars (default_dict) == NULL) return 1; - if (single_split) + if (mx->single_split) { if (!compare) split_values[0] @@ -1335,18 +1328,19 @@ nr_read_splits (int compare) split_cnt = dict_get_split_cnt (default_dict); for (i = 0; i < split_cnt; i++) { - if (!mget_token ()) + struct matrix_token token; + if (!mget_token (&token, mx->data_file)) return 0; - if (mtoken != MNUM) + if (token.type != MNUM) { msg (SE, _("Syntax error expecting SPLIT FILE value %s."), - context ()); + context (mx->data_file)); return 0; } if (!compare) - split_values[i] = mtokval; - else if (split_values[i] != mtokval) + split_values[i] = token.number; + else if (split_values[i] != token.number) { msg (SE, _("Expecting value %g for %s."), split_values[i], dict_get_split_vars (default_dict)[i]->name); @@ -1361,11 +1355,11 @@ nr_read_splits (int compare) values read to the last values read and returns 1 if they're equal, 0 otherwise. */ static int -nr_read_factors (int cell) +nr_read_factors (struct matrix_data_pgm *mx, int cell) { int compare; - if (n_factors == 0) + if (mx->n_factors == 0) return 1; assert (max_cell_index >= cell); @@ -1380,24 +1374,25 @@ nr_read_factors (int cell) { int i; - for (i = 0; i < n_factors; i++) + for (i = 0; i < mx->n_factors; i++) { - if (!mget_token ()) + struct matrix_token token; + if (!mget_token (&token, mx->data_file)) return 0; - if (mtoken != MNUM) + if (token.type != MNUM) { msg (SE, _("Syntax error expecting factor value %s."), - context ()); + context (mx->data_file)); return 0; } if (!compare) - nr_factor_values[i + n_factors * cell] = mtokval; - else if (nr_factor_values[i + n_factors * cell] != mtokval) + nr_factor_values[i + mx->n_factors * cell] = token.number; + else if (nr_factor_values[i + mx->n_factors * cell] != token.number) { msg (SE, _("Syntax error expecting value %g for %s %s."), - nr_factor_values[i + n_factors * cell], - factors[i]->name, context ()); + nr_factor_values[i + mx->n_factors * cell], + mx->factors[i]->name, context (mx->data_file)); return 0; } } @@ -1409,37 +1404,37 @@ nr_read_factors (int cell) /* Write the contents of a cell having content type CONTENT and data CP to the active file. */ static void -dump_cell_content (int content, double *cp, +dump_cell_content (struct matrix_data_pgm *mx, int content, double *cp, write_case_func *write_case, write_case_data wc_data) { int type = content_type[content]; { - st_bare_pad_copy (temp_case->data[rowtype_->fv].s, + st_bare_pad_copy (temp_case->data[mx->rowtype_->fv].s, content_names[content], 8); if (type != 1) - memset (&temp_case->data[varname_->fv].s, ' ', 8); + memset (&temp_case->data[mx->varname_->fv].s, ' ', 8); } { - int n_lines = (type == 1) ? n_continuous : 1; + int n_lines = (type == 1) ? mx->n_continuous : 1; int i; for (i = 0; i < n_lines; i++) { int j; - for (j = 0; j < n_continuous; j++) + for (j = 0; j < mx->n_continuous; j++) { - int fv = dict_get_var (default_dict, first_continuous + j)->fv; + int fv = dict_get_var (default_dict, mx->first_continuous + j)->fv; temp_case->data[fv].f = *cp; cp++; } if (type == 1) - st_bare_pad_copy (temp_case->data[varname_->fv].s, + st_bare_pad_copy (temp_case->data[mx->varname_->fv].s, dict_get_var (default_dict, - first_continuous + i)->name, + mx->first_continuous + i)->name, 8); write_case (wc_data); } @@ -1448,7 +1443,8 @@ dump_cell_content (int content, double *cp, /* Finally dump out everything from nr_data[] to the output file. */ static void -nr_output_data (write_case_func *write_case, write_case_data wc_data) +nr_output_data (struct matrix_data_pgm *mx, + write_case_func *write_case, write_case_data wc_data) { { struct variable *const *split; @@ -1460,20 +1456,20 @@ nr_output_data (write_case_func *write_case, write_case_data wc_data) temp_case->data[split[i]->fv].f = split_values[i]; } - if (n_factors) + if (mx->n_factors) { int cell; - for (cell = 0; cell < cells; cell++) + for (cell = 0; cell < mx->cells; cell++) { { int factor; - for (factor = 0; factor < n_factors; factor++) + for (factor = 0; factor < mx->n_factors; factor++) { - temp_case->data[factors[factor]->fv].f - = nr_factor_values[factor + cell * n_factors]; - debug_printf (("f:%s ", factors[factor]->name)); + temp_case->data[mx->factors[factor]->fv].f + = nr_factor_values[factor + cell * mx->n_factors]; + debug_printf (("f:%s ", mx->factors[factor]->name)); } } @@ -1481,12 +1477,12 @@ nr_output_data (write_case_func *write_case, write_case_data wc_data) int content; for (content = 0; content <= PROX; content++) - if (is_per_factor[content]) + if (mx->is_per_factor[content]) { assert (nr_data[content] != NULL && nr_data[content][cell] != NULL); - dump_cell_content (content, nr_data[content][cell], + dump_cell_content (mx, content, nr_data[content][cell], write_case, wc_data); } } @@ -1499,13 +1495,13 @@ nr_output_data (write_case_func *write_case, write_case_data wc_data) { int factor; - for (factor = 0; factor < n_factors; factor++) - temp_case->data[factors[factor]->fv].f = SYSMIS; + for (factor = 0; factor < mx->n_factors; factor++) + temp_case->data[mx->factors[factor]->fv].f = SYSMIS; } for (content = 0; content <= PROX; content++) - if (!is_per_factor[content] && nr_data[content] != NULL) - dump_cell_content (content, nr_data[content][0], + if (!mx->is_per_factor[content] && nr_data[content] != NULL) + dump_cell_content (mx, content, nr_data[content][0], write_case, wc_data); } } @@ -1530,58 +1526,62 @@ struct factor_data *wr_data; /* Current factor. */ struct factor_data *wr_current; -static int wr_read_splits (write_case_func *, write_case_data); -static int wr_output_data (write_case_func *, write_case_data); -static int wr_read_rowtype (void); -static int wr_read_factors (void); -static int wr_read_indeps (void); -static void matrix_data_read_with_rowtype (write_case_func *, +static int wr_read_splits (struct matrix_data_pgm *, + write_case_func *, write_case_data); +static int wr_output_data (struct matrix_data_pgm *, write_case_func *, write_case_data); +static int wr_read_rowtype (const struct matrix_token *, struct file_handle *); +static int wr_read_factors (struct matrix_data_pgm *); +static int wr_read_indeps (struct matrix_data_pgm *); +static void matrix_data_read_with_rowtype (struct case_source *, + write_case_func *, write_case_data); /* When ROWTYPE_ appears in the data, reads the matrices and writes them to the output file. */ static void -read_matrices_with_rowtype (void) +read_matrices_with_rowtype (struct matrix_data_pgm *mx) { - mtoken = MNULL; wr_data = wr_current = NULL; split_values = NULL; - cells = 0; + mx->cells = 0; - matrix_data_source.read = matrix_data_read_with_rowtype; - vfm_source = &matrix_data_source; + vfm_source = create_case_source (&matrix_data_with_rowtype_source_class, mx); procedure (NULL, NULL, NULL, NULL); free (split_values); - fh_close_handle (data_file); + fh_close_handle (mx->data_file); } /* Read from the data file and write it to the active file. */ static void -matrix_data_read_with_rowtype (write_case_func *write_case, +matrix_data_read_with_rowtype (struct case_source *source, + write_case_func *write_case, write_case_data wc_data) { + struct matrix_data_pgm *mx = source->aux; + do { - if (!wr_read_splits (write_case, wc_data)) + if (!wr_read_splits (mx, write_case, wc_data)) return; - if (!wr_read_factors ()) + if (!wr_read_factors (mx)) return; - if (!wr_read_indeps ()) + if (!wr_read_indeps (mx)) return; } - while (another_token ()); + while (another_token (mx->data_file)); - wr_output_data (write_case, wc_data); + wr_output_data (mx, write_case, wc_data); } /* Read the split file variables. If they differ from the previous set of split variables then output the data. Returns success. */ static int -wr_read_splits (write_case_func *write_case, write_case_data wc_data) +wr_read_splits (struct matrix_data_pgm *mx, + write_case_func *write_case, write_case_data wc_data) { int compare; size_t split_cnt; @@ -1605,23 +1605,24 @@ wr_read_splits (write_case_func *write_case, write_case_data wc_data) for (i = 0; i < split_cnt; i++) { - if (!mget_token ()) + struct matrix_token token; + if (!mget_token (&token, mx->data_file)) return 0; - if (mtoken != MNUM) + if (token.type != MNUM) { msg (SE, _("Syntax error %s expecting SPLIT FILE value."), - context ()); + context (mx->data_file)); return 0; } - if (compare && split_values[i] != mtokval && !different) + if (compare && split_values[i] != token.number && !different) { - if (!wr_output_data (write_case, wc_data)) + if (!wr_output_data (mx, write_case, wc_data)) return 0; different = 1; - cells = 0; + mx->cells = 0; } - split_values[i] = mtokval; + split_values[i] = token.number; } } @@ -1647,26 +1648,28 @@ compare_doubles (const void *a_, const void *b_, void *aux UNUSED) return -1; } -/* Return strcmp()-type comparison of the n_factors factors at _A and +/* Return strcmp()-type comparison of the MX->n_factors factors at _A and _B. Sort missing values toward the end. */ static int -compare_factors (const void *a_, const void *b_) +compare_factors (const void *a_, const void *b_, void *mx_) { + struct matrix_data_pgm *mx = mx_; struct factor_data *const *pa = a_; struct factor_data *const *pb = b_; const double *a = (*pa)->factors; const double *b = (*pb)->factors; - return lexicographical_compare (a, n_factors, - b, n_factors, - sizeof *a, - compare_doubles, NULL); + return lexicographical_compare_3way (a, mx->n_factors, + b, mx->n_factors, + sizeof *a, + compare_doubles, NULL); } /* Write out the data for the current split file to the active file. */ static int -wr_output_data (write_case_func *write_case, write_case_data wc_data) +wr_output_data (struct matrix_data_pgm *mx, + write_case_func *write_case, write_case_data wc_data) { { struct variable *const *split; @@ -1684,17 +1687,17 @@ wr_output_data (write_case_func *write_case, write_case_data wc_data) struct factor_data *iter; int i; - factors = xmalloc (sizeof *factors * cells); + factors = xmalloc (sizeof *factors * mx->cells); for (i = 0, iter = wr_data; iter; iter = iter->next, i++) factors[i] = iter; - qsort (factors, cells, sizeof *factors, compare_factors); + sort (factors, mx->cells, sizeof *factors, compare_factors, mx); wr_data = factors[0]; - for (i = 0; i < cells - 1; i++) + for (i = 0; i < mx->cells - 1; i++) factors[i]->next = factors[i + 1]; - factors[cells - 1]->next = NULL; + factors[mx->cells - 1]->next = NULL; free (factors); } @@ -1708,9 +1711,9 @@ wr_output_data (write_case_func *write_case, write_case_data wc_data) { int factor; - for (factor = 0; factor < n_factors; factor++) + for (factor = 0; factor < mx->n_factors; factor++) { - temp_case->data[factors[factor]->fv].f + temp_case->data[mx->factors[factor]->fv].f = iter->factors[factor]; debug_printf (("f:%s ", factors[factor]->name)); } @@ -1727,8 +1730,8 @@ wr_output_data (write_case_func *write_case, write_case_data wc_data) { int type = content_type[content]; int n_lines = (type == 1 - ? (n_continuous - - (section != FULL && diag == NODIAGONAL)) + ? (mx->n_continuous + - (mx->section != FULL && mx->diag == NODIAGONAL)) : 1); if (n_lines != iter->n_rows[content]) @@ -1742,35 +1745,38 @@ wr_output_data (write_case_func *write_case, write_case_data wc_data) } } - fill_matrix (content, iter->data[content]); + fill_matrix (mx, content, iter->data[content]); - dump_cell_content (content, iter->data[content], + dump_cell_content (mx, content, iter->data[content], write_case, wc_data); } } } } - pool_destroy (container); - container = pool_create (); + pool_destroy (mx->container); + mx->container = pool_create (); wr_data = wr_current = NULL; return 1; } -/* Read ROWTYPE_ from the data file. Return success. */ +/* Sets ROWTYPE_ based on the given TOKEN read from DATA_FILE. + Return success. */ static int -wr_read_rowtype (void) +wr_read_rowtype (const struct matrix_token *token, + struct file_handle *data_file) { if (wr_content != -1) { - msg (SE, _("Multiply specified ROWTYPE_ %s."), context ()); + msg (SE, _("Multiply specified ROWTYPE_ %s."), context (data_file)); return 0; } - if (mtoken != MSTR) + if (token->type != MSTR) { - msg (SE, _("Syntax error %s expecting ROWTYPE_ string."), context ()); + msg (SE, _("Syntax error %s expecting ROWTYPE_ string."), + context (data_file)); return 0; } @@ -1778,8 +1784,8 @@ wr_read_rowtype (void) char s[16]; char *cp; - memcpy (s, mtokstr, min (15, mtoklen)); - s[min (15, mtoklen)] = 0; + memcpy (s, token->string, min (15, token->length)); + s[min (15, token->length)] = 0; for (cp = s; *cp; cp++) *cp = toupper ((unsigned char) *cp); @@ -1789,7 +1795,7 @@ wr_read_rowtype (void) if (wr_content == -1) { - msg (SE, _("Syntax error %s."), context ()); + msg (SE, _("Syntax error %s."), context (data_file)); return 0; } @@ -1799,40 +1805,42 @@ wr_read_rowtype (void) /* Read the factors for the current row. Select a set of factors and point wr_current to it. */ static int -wr_read_factors (void) +wr_read_factors (struct matrix_data_pgm *mx) { - double *factor_values = local_alloc (sizeof *factor_values * n_factors); + double *factor_values = local_alloc (sizeof *factor_values * mx->n_factors); wr_content = -1; { int i; - for (i = 0; i < n_factors; i++) + for (i = 0; i < mx->n_factors; i++) { - if (!mget_token ()) + struct matrix_token token; + if (!mget_token (&token, mx->data_file)) goto lossage; - if (mtoken == MSTR) + if (token.type == MSTR) { - if (!wr_read_rowtype ()) + if (!wr_read_rowtype (&token, mx->data_file)) goto lossage; - if (!mget_token ()) + if (!mget_token (&token, mx->data_file)) goto lossage; } - if (mtoken != MNUM) + if (token.type != MNUM) { msg (SE, _("Syntax error expecting factor value %s."), - context ()); + context (mx->data_file)); goto lossage; } - factor_values[i] = mtokval; + factor_values[i] = token.number; } } if (wr_content == -1) { - if (!mget_token ()) + struct matrix_token token; + if (!mget_token (&token, mx->data_file)) goto lossage; - if (!wr_read_rowtype ()) + if (!wr_read_rowtype (&token, mx->data_file)) goto lossage; } @@ -1842,7 +1850,7 @@ wr_read_factors (void) { int i; - for (i = 0; i < n_factors; i++) + for (i = 0; i < mx->n_factors; i++) if (factor_values[i] != wr_current->factors[i]) goto cache_miss; goto winnage; @@ -1857,7 +1865,7 @@ cache_miss: { int i; - for (i = 0; i < n_factors; i++) + for (i = 0; i < mx->n_factors; i++) if (factor_values[i] != iter->factors[i]) goto next_item; @@ -1870,14 +1878,14 @@ cache_miss: /* Not found. Make a new item. */ { - struct factor_data *new = pool_alloc (container, sizeof *new); + struct factor_data *new = pool_alloc (mx->container, sizeof *new); - new->factors = pool_alloc (container, sizeof *new->factors * n_factors); + new->factors = pool_alloc (mx->container, sizeof *new->factors * mx->n_factors); { int i; - for (i = 0; i < n_factors; i++) + for (i = 0; i < mx->n_factors; i++) new->factors[i] = factor_values[i]; } @@ -1893,7 +1901,7 @@ cache_miss: new->next = wr_data; wr_data = wr_current = new; - cells++; + mx->cells++; } winnage: @@ -1907,7 +1915,7 @@ lossage: /* Read the independent variables into wr_current. */ static int -wr_read_indeps (void) +wr_read_indeps (struct matrix_data_pgm *mx) { struct factor_data *c = wr_current; const int type = content_type[wr_content]; @@ -1918,15 +1926,15 @@ wr_read_indeps (void) /* Allocate room for data if necessary. */ if (c->data[wr_content] == NULL) { - int n_items = n_continuous; + int n_items = mx->n_continuous; if (type == 1) - n_items *= n_continuous; + n_items *= mx->n_continuous; - c->data[wr_content] = pool_alloc (container, + c->data[wr_content] = pool_alloc (mx->container, sizeof **c->data * n_items); } - cp = &c->data[wr_content][n_rows * n_continuous]; + cp = &c->data[wr_content][n_rows * mx->n_continuous]; /* Figure out how much to read from this line. */ switch (type) @@ -1940,36 +1948,36 @@ wr_read_indeps (void) return 0; } if (type == 0) - n_cols = n_continuous; + n_cols = mx->n_continuous; else n_cols = 1; break; case 1: - if (n_rows >= n_continuous - (section != FULL && diag == NODIAGONAL)) + if (n_rows >= mx->n_continuous - (mx->section != FULL && mx->diag == NODIAGONAL)) { msg (SE, _("Too many rows of matrix data for %s."), content_names[wr_content]); return 0; } - switch (section) + switch (mx->section) { case LOWER: n_cols = n_rows + 1; - if (diag == NODIAGONAL) - cp += n_continuous; + if (mx->diag == NODIAGONAL) + cp += mx->n_continuous; break; case UPPER: cp += n_rows; - n_cols = n_continuous - n_rows; - if (diag == NODIAGONAL) + n_cols = mx->n_continuous - n_rows; + if (mx->diag == NODIAGONAL) { n_cols--; cp++; } break; case FULL: - n_cols = n_continuous; + n_cols = mx->n_continuous; break; default: assert (0); @@ -1988,19 +1996,21 @@ wr_read_indeps (void) for (j = 0; j < n_cols; j++) { - if (!mget_token ()) + struct matrix_token token; + if (!mget_token (&token, mx->data_file)) return 0; - if (mtoken != MNUM) + if (token.type != MNUM) { msg (SE, _("Syntax error expecting value for %s %s."), - dict_get_var (default_dict, first_continuous + j)->name, - context ()); + dict_get_var (default_dict, mx->first_continuous + j)->name, + context (mx->data_file)); return 0; } - *cp++ = mtokval; + *cp++ = token.number; } - if (!force_eol (content_names[wr_content])) + if (mx->fmt != FREE + && !force_eol (mx->data_file, content_names[wr_content])) return 0; debug_printf (("\n")); } @@ -2010,14 +2020,17 @@ wr_read_indeps (void) /* Matrix source. */ -struct case_stream matrix_data_source = +static const struct case_source_class matrix_data_with_rowtype_source_class = { - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, "MATRIX DATA", + matrix_data_read_with_rowtype, + NULL, }; +static const struct case_source_class +matrix_data_without_rowtype_source_class = + { + "MATRIX DATA", + matrix_data_read_without_rowtype, + NULL, + }; diff --git a/src/set.q b/src/set.q index 5d9f1490..2d90ac7e 100644 --- a/src/set.q +++ b/src/set.q @@ -119,6 +119,7 @@ int set_testing_mode; int set_undefined; int set_viewlength; int set_viewwidth; +size_t set_max_workspace = 4L * 1024 * 1024; static void set_routing (int q, int *setting); static int set_ccx (const char *cc_string, struct set_cust_currency * cc, @@ -316,8 +317,18 @@ cmd_set (void) msg (SW, _("%s is not yet implemented."),"TB1"); if (cmd.undef != -1) set_undefined = cmd.undef == STC_NOWARN ? 0 : 1; - if (cmd.n_workspace != NOT_LONG) - msg (SE, _("%s is obsolete."),"WORKSPACE"); + if (cmd.n_workspace != NOT_LONG) + { + if (cmd.n_workspace < 1024) + msg (SE, _("Workspace limit must be at least 1 MB.")); + else + { + if (cmd.n_workspace > (size_t) -1 / 1024) + set_max_workspace = -1; + else + set_max_workspace = 1024 * cmd.n_workspace; + } + } /* PC+ compatible syntax. */ if (cmd.scrn != -1) diff --git a/src/settings.h b/src/settings.h index 3a8eaede..30256ae3 100644 --- a/src/settings.h +++ b/src/settings.h @@ -98,6 +98,7 @@ */ +#include #include /* The value that blank numeric fields are set to when read in; @@ -250,4 +251,8 @@ extern int set_viewlength; /* Screen width. */ extern int set_viewwidth; +/* Approximate maximum amount of memory to use for cases, in + bytes. */ +extern size_t set_max_workspace; + #endif /* !settings_h */ diff --git a/src/sort.c b/src/sort.c index 7949ea5e..399823ef 100644 --- a/src/sort.c +++ b/src/sort.c @@ -23,13 +23,14 @@ #include #include #include +#include "algorithm.h" #include "alloc.h" #include "command.h" #include "error.h" #include "expr.h" -#include "heap.h" #include "lexer.h" #include "misc.h" +#include "settings.h" #include "str.h" #include "var.h" #include "vfm.h" @@ -49,1331 +50,1301 @@ #include "debug-print.h" -/* Variables to sort. */ -struct variable **v_sort; -int nv_sort; - -/* Used when internal-sorting to a separate file. */ -static struct case_list **separate_case_tab; - /* Other prototypes. */ -static int compare_case_lists (const void *, const void *); -static int do_internal_sort (int separate); -static int do_external_sort (int separate); -int parse_sort_variables (void); -void read_sort_output (write_case_func *write_case, write_case_data wc_data); +static int compare_record (const union value *, const union value *, + const struct sort_cases_pgm *); +static int compare_case_lists (const void *, const void *, void *); +static struct internal_sort *do_internal_sort (struct sort_cases_pgm *, + int separate); +static void destroy_internal_sort (struct internal_sort *); +static struct external_sort *do_external_sort (struct sort_cases_pgm *, + int separate); +static void destroy_external_sort (struct external_sort *); +struct sort_cases_pgm *parse_sort (void); /* Performs the SORT CASES procedures. */ int cmd_sort_cases (void) { - /* First, just parse the command. */ + struct sort_cases_pgm *scp; + int success; + lex_match_id ("SORT"); lex_match_id ("CASES"); lex_match (T_BY); - if (!parse_sort_variables ()) + scp = parse_sort (); + if (scp == NULL) return CMD_FAILURE; cancel_temporary (); - /* Then it's time to do the actual work. There are two cases: - - (internal sort) All the data is in memory. In this case, we - perform an EXECUTE to get the data into the desired form, then - sort the cases in place, if it is still all in memory. - - (external sort) The data is not in memory. It may be coming from - a system file or other data file, etc. In any case, it is now - time to perform an external sort. This is better explained in - do_external_sort(). */ - - /* Do all this dirty work. */ - { - int success = sort_cases (0); - free (v_sort); - if (success) - return lex_end_of_command (); - else - return CMD_FAILURE; - } + success = sort_cases (scp, 0); + destroy_sort_cases_pgm (scp); + if (success) + return lex_end_of_command (); + else + return CMD_FAILURE; } -/* Parses a list of sort variables into v_sort and nv_sort. */ -int -parse_sort_variables (void) +/* Parses a list of sort keys and returns a struct sort_cases_pgm + based on it. Returns a null pointer on error. */ +struct sort_cases_pgm * +parse_sort (void) { - v_sort = NULL; - nv_sort = 0; + struct sort_cases_pgm *scp; + + scp = xmalloc (sizeof *scp); + scp->ref_cnt = 1; + scp->vars = NULL; + scp->dirs = NULL; + scp->var_cnt = 0; + scp->isrt = NULL; + scp->xsrt = NULL; + do { - int prev_nv_sort = nv_sort; - int order = SRT_ASCEND; + int prev_var_cnt = scp->var_cnt; + enum sort_direction direction = SRT_ASCEND; - if (!parse_variables (default_dict, &v_sort, &nv_sort, + /* Variables. */ + if (!parse_variables (default_dict, &scp->vars, &scp->var_cnt, PV_NO_DUPLICATE | PV_APPEND | PV_NO_SCRATCH)) - return 0; + goto error; + + /* Sort direction. */ if (lex_match ('(')) { if (lex_match_id ("D") || lex_match_id ("DOWN")) - order = SRT_DESCEND; + direction = SRT_DESCEND; else if (!lex_match_id ("A") && !lex_match_id ("UP")) { - free (v_sort); msg (SE, _("`A' or `D' expected inside parentheses.")); - return 0; + goto error; } if (!lex_match (')')) { - free (v_sort); msg (SE, _("`)' expected.")); - return 0; + goto error; } } - for (; prev_nv_sort < nv_sort; prev_nv_sort++) - v_sort[prev_nv_sort]->p.srt.order = order; + scp->dirs = xrealloc (scp->dirs, sizeof *scp->dirs * scp->var_cnt); + for (; prev_var_cnt < scp->var_cnt; prev_var_cnt++) + scp->dirs[prev_var_cnt] = direction; } while (token != '.' && token != '/'); - return 1; + return scp; + + error: + destroy_sort_cases_pgm (scp); + return NULL; +} + +void +destroy_sort_cases_pgm (struct sort_cases_pgm *scp) +{ + if (scp != NULL) + { + assert (scp->ref_cnt > 0); + if (--scp->ref_cnt > 0) + return; + + free (scp->vars); + free (scp->dirs); + destroy_internal_sort (scp->isrt); + destroy_external_sort (scp->xsrt); + free (scp); + } } /* Sorts the active file based on the key variables specified in - global variables v_sort and nv_sort. The output is either to the + global variables vars and var_cnt. The output is either to the active file, if SEPARATE is zero, or to a separate file, if SEPARATE is nonzero. In the latter case the output cases can be read with a call to read_sort_output(). (In the former case the output cases should be dealt with through the usual vfm interface.) - The caller is responsible for freeing v_sort[]. */ + The caller is responsible for freeing vars[]. */ int -sort_cases (int separate) +sort_cases (struct sort_cases_pgm *scp, int separate) { - assert (separate_case_tab == NULL); - /* Not sure this is necessary but it's good to be safe. */ - if (separate && vfm_source == &sort_stream) + if (separate && case_source_is_class (vfm_source, &sort_source_class)) procedure (NULL, NULL, NULL, NULL); /* SORT CASES cancels PROCESS IF. */ expr_free (process_if_expr); process_if_expr = NULL; - if (do_internal_sort (separate)) + /* Try an internal sort first. */ + scp->isrt = do_internal_sort (scp, separate); + if (scp->isrt != NULL) + return 1; + + /* Fall back to an external sort. */ + write_active_file_to_disk (); + scp->xsrt = do_external_sort (scp, separate); + if (scp->xsrt != NULL) return 1; - page_to_disk (); - return do_external_sort (separate); + destroy_sort_cases_pgm (scp); + return 0; } + +struct internal_sort + { + struct case_list **results; + }; /* If a reasonable situation is set up, do an internal sort of the data. Return success. */ -static int -do_internal_sort (int separate) +static struct internal_sort * +do_internal_sort (struct sort_cases_pgm *scp, int separate) { - if (vfm_source != &vfm_disk_stream) + struct internal_sort *isrt; + + isrt = xmalloc (sizeof *isrt); + isrt->results = NULL; + + if (!case_source_is_class (vfm_source, &disk_source_class)) { - if (vfm_source != &vfm_memory_stream) + if (!case_source_is_class (vfm_source, &memory_source_class)) procedure (NULL, NULL, NULL, NULL); - if (vfm_source == &vfm_memory_stream) + + if (case_source_is_class (vfm_source, &memory_source_class)) { - struct case_list **case_tab = malloc (sizeof *case_tab - * (vfm_source_info.ncases + 1)); - if (vfm_source_info.ncases == 0) - { - free (case_tab); - return 1; - } - if (case_tab != NULL) - { - struct case_list *clp = memory_source_cases; - struct case_list **ctp = case_tab; - int i; - - for (; clp; clp = clp->next) - *ctp++ = clp; - qsort (case_tab, vfm_source_info.ncases, sizeof *case_tab, - compare_case_lists); - - if (!separate) - { - memory_source_cases = case_tab[0]; - for (i = 1; i < vfm_source_info.ncases; i++) - case_tab[i - 1]->next = case_tab[i]; - case_tab[vfm_source_info.ncases - 1]->next = NULL; - free (case_tab); - } else { - case_tab[vfm_source_info.ncases] = NULL; - separate_case_tab = case_tab; - } - - return 1; - } + struct case_list *case_list; + struct case_list **case_array; + size_t case_cnt; + int i; + + case_cnt = vfm_source_info.ncases; + if (case_cnt == 0) + return isrt; + + if (case_cnt > set_max_workspace / sizeof *case_array) + goto error; + + case_list = memory_source_get_cases (vfm_source); + case_array = malloc (sizeof *case_array * (case_cnt + 1)); + if (case_array == NULL) + goto error; + + for (i = 0; case_list != NULL; i++) + { + case_array[i] = case_list; + case_list = case_list->next; + } + assert (i == case_cnt); + case_array[case_cnt] = NULL; + + sort (case_array, case_cnt, sizeof *case_array, + compare_case_lists, scp); + + if (!separate) + { + memory_source_set_cases (vfm_source, case_array[0]); + for (i = 1; i <= case_cnt; i++) + case_array[i - 1]->next = case_array[i]; + free (case_array); + } + else + isrt->results = case_array; + + return isrt; } } - return 0; + + error: + free (isrt); + return NULL; +} + +static void +destroy_internal_sort (struct internal_sort *isrt) +{ + if (isrt != NULL) + { + free (isrt->results); + free (isrt); + } } -/* Compares the NV_SORT variables in V_SORT[] between the +/* Compares the VAR_CNT variables in VARS[] between the `case_list's at A and B, and returns a strcmp()-type result. */ static int -compare_case_lists (const void *a_, const void *b_) +compare_case_lists (const void *a_, const void *b_, void *scp_) { + struct sort_cases_pgm *scp = scp_; struct case_list *const *pa = a_; struct case_list *const *pb = b_; struct case_list *a = *pa; struct case_list *b = *pb; - struct variable *v; - int result = 0; - int i; - - for (i = 0; i < nv_sort; i++) - { - v = v_sort[i]; - - if (v->type == NUMERIC) - { - double af = a->c.data[v->fv].f; - double bf = b->c.data[v->fv].f; - - result = af < bf ? -1 : af > bf; - } - else - result = memcmp (a->c.data[v->fv].s, b->c.data[v->fv].s, v->width); - - if (result != 0) - break; - } - if (v->p.srt.order == SRT_DESCEND) - result = -result; - return result; + return compare_record (a->c.data, b->c.data, scp); } /* External sort. */ -/* Maximum number of input + output file handles. */ -#if defined FOPEN_MAX && (FOPEN_MAX - 5 < 18) -#define MAX_FILE_HANDLES (FOPEN_MAX - 5) -#else -#define MAX_FILE_HANDLES 18 -#endif - -#if MAX_FILE_HANDLES < 3 -#error At least 3 file handles must be available for sorting. -#endif - -/* Number of input buffers. */ -#define N_INPUT_BUFFERS (MAX_FILE_HANDLES - 1) - -/* Maximum order of merge. This is the value suggested by Knuth; - specifically, he said to use tree selection, which we don't - implement, for larger orders of merge. */ +/* Maximum order of merge. If you increase this, then you should + use a heap for comparing cases during merge. */ #define MAX_MERGE_ORDER 7 /* Minimum total number of records for buffers. */ #define MIN_BUFFER_TOTAL_SIZE_RECS 64 -/* Minimum single input or output buffer size, in bytes and records. */ +/* Minimum single input buffer size, in bytes and records. */ #define MIN_BUFFER_SIZE_BYTES 4096 #define MIN_BUFFER_SIZE_RECS 16 -/* Structure for replacement selection tree. */ -struct repl_sel_tree +#if MIN_BUFFER_SIZE_RECS * 2 + 16 > MIN_BUFFER_TOTAL_SIZE_RECS +#error MIN_BUFFER_SIZE_RECS and MIN_BUFFER_TOTAL_SIZE_RECS do not make sense. +#endif + +/* An initial run and its length. */ +struct initial_run { - struct repl_sel_tree *loser;/* Loser associated w/this internal node. */ - int rn; /* Run number of `loser'. */ - struct repl_sel_tree *fe; /* Internal node above this external node. */ - struct repl_sel_tree *fi; /* Internal node above this internal node. */ - union value record[1]; /* The case proper. */ + int file_idx; /* File index. */ + size_t case_cnt; /* Number of cases. */ }; -/* Static variables used for sorting. */ -static struct repl_sel_tree **x; /* Buffers. */ -static int x_max; /* Size of buffers, in records. */ -static int records_per_buffer; /* Number of records in each buffer. */ - -/* In the merge phase, the first N_INPUT_BUFFERS handle[] elements are - input files and the last element is the output file. Before that, - they're all used as output files, although the last one is - segregated. */ -static FILE *handle[MAX_FILE_HANDLES]; /* File handles. */ - -/* Now, MAX_FILE_HANDLES is the maximum number of files we will *try* - to open. But if we can't open that many, max_handles will be set - to the number we apparently can open. */ -static int max_handles; /* Maximum number of handles. */ - -/* When we create temporary files, they are all put in the same - directory and numbered sequentially from zero. tmp_basename is the - drive/directory, etc., and tmp_extname can be sprintf() with "%08x" - to the file number, then tmp_basename used to open the file. */ -static char *tmp_basename; /* Temporary file basename. */ -static char *tmp_extname; /* Temporary file extension name. */ - -/* We use Huffman's method to determine the merge pattern. This means - that we need to know which runs are the shortest at any given time. - Priority queues as implemented by heap.c are a natural for this - task (probably because I wrote the code specifically for it). */ -static struct heap *huffman_queue; /* Huffman priority queue. */ - -/* Prototypes for helper functions. */ -static void sort_stream_write (void); -static int write_initial_runs (int separate); -static int allocate_cases (void); -static int allocate_file_handles (void); -static int merge (void); -static void rmdir_temp_dir (void); - -/* Performs an external sort of the active file. A description of the - procedure follows. All page references refer to Knuth's _Art of - Computer Programming, Vol. 3: Sorting and Searching_, which is the - canonical resource for sorting. - - 1. The data is read and S initial runs are formed through the - action of algorithm 5.4.1R (replacement selection). - - 2. Huffman's method (p. 365-366) is used to determine the optimum - merge pattern. - - 3. If an OS that supports overlapped reading, writing, and - computing is being run, we should use 5.4.6F for forecasting. - Otherwise, buffers are filled only when they run out of data. - FIXME: Since the author of PSPP uses GNU/Linux, which does not - yet implement overlapped r/w/c, 5.4.6F is not used. - - 4. We perform P-way merges: - - (a) The desired P is the smallest P such that ceil(ln(S)/ln(P)) - is minimized. (FIXME: Since I don't have an algorithm for - minimizing this, it's just set to MAX_MERGE_ORDER.) - - (b) P is reduced if the selected value would make input buffers - less than 4096 bytes each, or 16 records, whichever is larger. - - (c) P is reduced if we run out of available file handles or space - for file handles. - - (d) P is reduced if we don't have space for one or two output - buffers, which have the same minimum size as input buffers. (We - need two output buffers if 5.4.6F is in use for forecasting.) */ +/* Sorts initial runs A and B in decending order by length. */ static int -do_external_sort (int separate) +compare_initial_runs (const void *a_, const void *b_, void *aux UNUSED) { - int success = 0; - - assert (MAX_FILE_HANDLES >= 3); - - x = NULL; - tmp_basename = NULL; - - huffman_queue = heap_create (512); - if (huffman_queue == NULL) - return 0; - - if (!allocate_cases ()) - goto lossage; + const struct initial_run *a = a_; + const struct initial_run *b = b_; + + return a->case_cnt > b->case_cnt ? -1 : a->case_cnt case_cnt; +} - if (!allocate_file_handles ()) - goto lossage; +struct external_sort + { + struct sort_cases_pgm *scp; /* SORT CASES info. */ + struct initial_run *initial_runs; /* Array of initial runs. */ + size_t run_cnt, run_cap; /* Number of runs, allocated capacity. */ + char *temp_dir; /* Temporary file directory name. */ + int next_file_idx; /* Lowest unused file index. */ + }; - if (!write_initial_runs (separate)) - goto lossage; +/* Prototypes for helper functions. */ +static void sort_sink_write (struct case_sink *, struct ccase *); +static int write_initial_runs (struct external_sort *, int separate); +static int init_external_sort (struct external_sort *); +static int merge (struct external_sort *); +static void rmdir_temp_dir (struct external_sort *); +static void remove_temp_file (struct external_sort *xsrt, int file_idx); + +/* Performs an external sort of the active file according to the + specification in SCP. Forms initial runs using a heap as a + reservoir. Determines the optimum merge pattern via Huffman's + method (see Knuth vol. 3, 2nd edition, p. 365-366), and merges + according to that pattern. */ +static struct external_sort * +do_external_sort (struct sort_cases_pgm *scp, int separate) +{ + struct external_sort *xsrt; + int success = 0; - merge (); + xsrt = xmalloc (sizeof *xsrt); + xsrt->scp = scp; + if (!init_external_sort (xsrt)) + goto done; + if (!write_initial_runs (xsrt, separate)) + goto done; + if (!merge (xsrt)) + goto done; success = 1; - /* Despite the name, flow of control comes here regardless of - whether or not the sort is successful. */ -lossage: - heap_destroy (huffman_queue); - - if (x) + done: + if (success) { - int i; - - for (i = 0; i <= x_max; i++) - free (x[i]); - free (x); + /* Don't destroy anything because we'll need it for reading + the output. */ + return xsrt; + } + else + { + destroy_external_sort (xsrt); + return NULL; } - - if (!success) - rmdir_temp_dir (); - - return success; } -#if !HAVE_GETPID -#define getpid() (0) -#endif - -/* Sets up to open temporary files. */ -/* PORTME: This creates a directory for temporary files. Some OSes - might not have that concept... */ -static int -allocate_file_handles (void) +/* Destroys XSRT. */ +static void +destroy_external_sort (struct external_sort *xsrt) { - const char *dir; /* Directory prefix. */ - char *buf; /* String buffer. */ - char *cp; /* Pointer into buf. */ - - dir = getenv ("SPSSTMPDIR"); - if (dir == NULL) - dir = getenv ("SPSSXTMPDIR"); - if (dir == NULL) - dir = getenv ("TMPDIR"); -#ifdef P_tmpdir - if (dir == NULL) - dir = P_tmpdir; -#endif -#ifdef unix - if (dir == NULL) - dir = "/tmp"; -#elif defined (__MSDOS__) - if (dir == NULL) - dir = getenv ("TEMP"); - if (dir == NULL) - dir = getenv ("TMP"); - if (dir == NULL) - dir = "\\"; -#else - dir = ""; -#endif - - buf = xmalloc (strlen (dir) + 1 + 4 + 8 + 4 + 1 + INT_DIGITS + 1); - cp = spprintf (buf, "%s%c%04lX%04lXpspp", dir, DIR_SEPARATOR, - ((long) time (0)) & 0xffff, ((long) getpid ()) & 0xffff); -#ifndef __MSDOS__ - if (-1 == mkdir (buf, S_IRWXU)) -#else - if (-1 == mkdir (buf)) -#endif + if (xsrt != NULL) { - free (buf); - msg (SE, _("%s: Cannot create temporary directory: %s."), - buf, strerror (errno)); - return 0; + int i; + + for (i = 0; i < xsrt->run_cnt; i++) + remove_temp_file (xsrt, xsrt->initial_runs[i].file_idx); + rmdir_temp_dir (xsrt); + free (xsrt->initial_runs); + free (xsrt); } - *cp++ = DIR_SEPARATOR; - - tmp_basename = buf; - tmp_extname = cp; - - max_handles = MAX_FILE_HANDLES; - - return 1; } -/* Removes the directory created for temporary files, if one exists. - Also frees tmp_basename. */ -static void -rmdir_temp_dir (void) +#ifdef HAVE_MKDTEMP +/* Creates and returns the name of a temporary directory. */ +static char * +make_temp_dir (void) { - if (NULL == tmp_basename) - return; + const char *parent_dir; + char *temp_dir; - tmp_extname[-1] = '\0'; - if (rmdir (tmp_basename) == -1) - msg (SE, _("%s: Error removing directory for temporary files: %s."), - tmp_basename, strerror (errno)); + if (getenv ("TMPDIR") != NULL) + parent_dir = getenv ("TMPDIR"); + else + parent_dir = P_tmpdir; - free (tmp_basename); + temp_dir = xmalloc (strlen (parent_dir) + 32); + sprintf (temp_dir, "%s%cpsppXXXXXX", parent_dir, DIR_SEPARATOR); + if (mkdtemp (temp_dir) == NULL) + { + msg (SE, _("%s: Creating temporary directory: %s."), + temp_dir, strerror (errno)); + free (temp_dir); + return NULL; + } + else + return temp_dir; } - -/* Allocates room for lots of cases as a buffer. */ +#else /* !HAVE_MKDTEMP */ +/* Creates directory DIR. */ static int -allocate_cases (void) +do_mkdir (const char *dir) { - /* This is the size of one case. */ - const int case_size = (sizeof (struct repl_sel_tree) - + dict_get_case_size (default_dict) - - sizeof (union value) - + sizeof (struct repl_sel_tree *)); - - x = NULL; - - /* Allocate as many cases as we can, assuming a space of four - void pointers for malloc()'s internal bookkeeping. */ - x_max = MAX_WORKSPACE / (case_size + 4 * sizeof (void *)); - x = malloc (sizeof (struct repl_sel_tree *) * x_max); - if (x != NULL) - { - int i; +#ifndef __MSDOS__ + return mkdir (dir, S_IRWXU); +#else + return mkdir (dir); +#endif +} - for (i = 0; i < x_max; i++) - { - x[i] = malloc (sizeof (struct repl_sel_tree) - + dict_get_case_size (default_dict) - - sizeof (union value)); - if (x[i] == NULL) - break; - } - x_max = i; - } - if (x == NULL || x_max < MIN_BUFFER_TOTAL_SIZE_RECS) +/* Creates and returns the name of a temporary directory. */ +static char * +make_temp_dir (void) +{ + int i; + + for (i = 0; i < 100; i++) { - if (x != NULL) - { - int i; - - for (i = 0; i < x_max; i++) - free (x[i]); - } - free (x); - msg (SE, _("Out of memory. Could not allocate room for minimum of %d " - "cases of %d bytes each. (PSPP workspace is currently " - "restricted to a maximum of %d KB.)"), - MIN_BUFFER_TOTAL_SIZE_RECS, case_size, MAX_WORKSPACE / 1024); - x_max = 0; - x = NULL; - return 0; + char temp_dir[L_tmpnam + 1]; + if (tmpnam (temp_dir) == NULL) + { + msg (SE, _("Generating temporary directory name failed: %s."), + strerror (errno)); + return NULL; + } + else if (do_mkdir (temp_dir) == 0) + return xstrdup (temp_dir); } + + msg (SE, _("Creating temporary directory failed: %s."), strerror (errno)); + return NULL; +} +#endif /* !HAVE_MKDTEMP */ - /* The last element of the array is used to store lastkey. */ - x_max--; +/* Sets up to open temporary files. */ +static int +init_external_sort (struct external_sort *xsrt) +{ + /* Zero. */ + xsrt->temp_dir = NULL; + xsrt->next_file_idx = 0; + + /* Huffman queue. */ + xsrt->run_cap = 512; + xsrt->run_cnt = 0; + xsrt->initial_runs = xmalloc (sizeof *xsrt->initial_runs * xsrt->run_cap); + + /* Temporary directory. */ + xsrt->temp_dir = make_temp_dir (); + if (xsrt->temp_dir == NULL) + return 0; - debug_printf ((_("allocated %d cases == %d bytes\n"), - x_max, x_max * case_size)); return 1; } - -/* Replacement selection. */ - -static int rmax, rc, rq; -static struct repl_sel_tree *q; -static union value *lastkey; -static int run_no, file_index; -static int deferred_abort; -static int run_length; -static int compare_record (union value *, union value *); -static inline void -output_record (union value *v) +static int +simulate_error (void) { - union value *src_case; + static int op_err_cnt = -1; + static int op_cnt; - if (deferred_abort) - return; - - if (compaction_necessary) - { - compact_case (compaction_case, (struct ccase *) v); - src_case = (union value *) compaction_case; - } + if (op_err_cnt == -1 || op_cnt++ < op_err_cnt) + return 0; else - src_case = (union value *) v; - - if ((int) fwrite (src_case, sizeof *src_case, compaction_nval, - handle[file_index]) - != compaction_nval) { - deferred_abort = 1; - sprintf (tmp_extname, "%08x", run_no); - msg (SE, _("%s: Error writing temporary file: %s."), - tmp_basename, strerror (errno)); - return; + errno = 0; + return 1; } - - run_length++; } -static int -close_handle (int i) +/* Removes the directory created for temporary files, if one + exists. */ +static void +rmdir_temp_dir (struct external_sort *xsrt) { - int result = fclose (handle[i]); - msg (VM (2), _("SORT: Closing handle %d."), i); - - handle[i] = NULL; - if (EOF == result) + if (xsrt->temp_dir != NULL && rmdir (xsrt->temp_dir) == -1) { - sprintf (tmp_extname, "%08x", i); - msg (SE, _("%s: Error closing temporary file: %s."), - tmp_basename, strerror (errno)); - return 0; + msg (SE, _("%s: Error removing directory for temporary files: %s."), + xsrt->temp_dir, strerror (errno)); + xsrt->temp_dir = NULL; } - return 1; } -static int -close_handles (int beg, int end) +#define TEMP_FILE_NAME_SIZE (L_tmpnam + 32) +static void +get_temp_file_name (struct external_sort *xsrt, int file_idx, + char filename[TEMP_FILE_NAME_SIZE]) { - int success = 1; - int i; - - for (i = beg; i < end; i++) - success &= close_handle (i); - return success; + assert (xsrt->temp_dir != NULL); + sprintf (filename, "%s%c%04d", xsrt->temp_dir, DIR_SEPARATOR, file_idx); } -static int -open_handle_w (int handle_no, int run_no) +static FILE * +open_temp_file (struct external_sort *xsrt, int file_idx, const char *mode) { - sprintf (tmp_extname, "%08x", run_no); - msg (VM (1), _("SORT: %s: Opening for writing as run %d."), - tmp_basename, run_no); - - /* The `x' modifier causes the GNU C library to insist on creating a - new file--if the file already exists, an error is signaled. The - ANSI C standard says that other libraries should ignore anything - after the `w+b', so it shouldn't be a problem. */ - return NULL != (handle[handle_no] = fopen (tmp_basename, "w+bx")); + char temp_file[TEMP_FILE_NAME_SIZE]; + FILE *file; + + get_temp_file_name (xsrt, file_idx, temp_file); + + file = fopen (temp_file, mode); + if (simulate_error () || file == NULL) + msg (SE, _("%s: Error opening temporary file for %s: %s."), + temp_file, mode[0] == 'r' ? "reading" : "writing", + strerror (errno)); + + return file; } static int -open_handle_r (int handle_no, int run_no) +close_temp_file (struct external_sort *xsrt, int file_idx, FILE *file) { - FILE *f; - - sprintf (tmp_extname, "%08x", run_no); - msg (VM (1), _("SORT: %s: Opening for writing as run %d."), - tmp_basename, run_no); - f = handle[handle_no] = fopen (tmp_basename, "rb"); - - if (f == NULL) + if (file != NULL) { - msg (SE, _("%s: Error opening temporary file for reading: %s."), - tmp_basename, strerror (errno)); - return 0; + char temp_file[TEMP_FILE_NAME_SIZE]; + get_temp_file_name (xsrt, file_idx, temp_file); + if (simulate_error () || fclose (file) == EOF) + { + msg (SE, _("%s: Error closing temporary file: %s."), + temp_file, strerror (errno)); + return 0; + } } - return 1; } -/* Begins a new initial run, specifically its output file. */ static void -begin_run (void) +remove_temp_file (struct external_sort *xsrt, int file_idx) { - /* Decide which handle[] to use. If run_no is max_handles or - greater, then we've run out of handles so it's time to just do - one file at a time, which by default is handle 0. */ - file_index = (run_no < max_handles ? run_no : 0); - run_length = 0; - - /* Alright, now create the temporary file. */ - if (open_handle_w (file_index, run_no) == 0) + if (file_idx != -1) { - /* Failure to create the temporary file. Check if there are - unacceptably few files already open. */ - if (file_index < 3) - { - deferred_abort = 1; - msg (SE, _("%s: Error creating temporary file: %s."), - tmp_basename, strerror (errno)); - return; - } - - /* Close all the open temporary files. */ - if (!close_handles (0, file_index)) - return; - - /* Now try again to create the temporary file. */ - max_handles = file_index; - file_index = 0; - if (open_handle_w (0, run_no) == 0) - { - /* It still failed, report it this time. */ - deferred_abort = 1; - msg (SE, _("%s: Error creating temporary file: %s."), - tmp_basename, strerror (errno)); - return; - } + char temp_file[TEMP_FILE_NAME_SIZE]; + get_temp_file_name (xsrt, file_idx, temp_file); + if (simulate_error () || remove (temp_file) != 0) + msg (SE, _("%s: Error removing temporary file: %s."), + temp_file, strerror (errno)); } } -/* Ends the current initial run. Just increments run_no if no initial - run has been started yet. */ -static void -end_run (void) +static int +write_temp_file (struct external_sort *xsrt, int file_idx, + FILE *file, const void *data, size_t size) { - /* Close file handles if necessary. */ - { - int result; - - if (run_no == max_handles - 1) - result = close_handles (0, max_handles); - else if (run_no >= max_handles) - result = close_handle (0); - else - result = 1; - if (!result) - deferred_abort = 1; - } - - /* Advance to next run. */ - run_no++; - if (run_no) - heap_insert (huffman_queue, run_no - 1, run_length); + if (!simulate_error () && fwrite (data, size, 1, file) == 1) + return 1; + else + { + char temp_file[TEMP_FILE_NAME_SIZE]; + get_temp_file_name (xsrt, file_idx, temp_file); + msg (SE, _("%s: Error writing temporary file: %s."), + temp_file, strerror (errno)); + return 0; + } } -/* Performs 5.4.1R. */ static int -write_initial_runs (int separate) +read_temp_file (struct external_sort *xsrt, int file_idx, + FILE *file, void *data, size_t size) { - run_no = -1; - deferred_abort = 0; - - /* Steps R1, R2, R3. */ - rmax = 0; - rc = 0; - lastkey = NULL; - q = x[0]; - rq = 0; + if (!simulate_error () && fread (data, size, 1, file) == 1) + return 1; + else + { + char temp_file[TEMP_FILE_NAME_SIZE]; + get_temp_file_name (xsrt, file_idx, temp_file); + if (ferror (file)) + msg (SE, _("%s: Error reading temporary file: %s."), + temp_file, strerror (errno)); + else + msg (SE, _("%s: Unexpected end of temporary file."), + temp_file); + return 0; + } +} + +/* Replacement selection. */ + +/* Pairs a record with a run number. */ +struct record_run { - int j; + int run; /* Run number of case. */ + struct case_list *record; /* Case data. */ + }; - for (j = 0; j < x_max; j++) - { - struct repl_sel_tree *J = x[j]; +struct initial_run_state + { + struct external_sort *xsrt; + + /* Reservoir. */ + struct record_run *records; /* Records arranged as a heap. */ + size_t record_cnt; /* Current number of records. */ + size_t record_cap; /* Capacity for records. */ + struct case_list *free_list;/* Cases not in heap. */ + + /* Run currently being output. */ + int file_idx; /* Temporary file number. */ + size_t case_cnt; /* Number of cases so far. */ + FILE *output_file; /* Output file. */ + struct case_list *last_output;/* Record last output. */ + + int okay; /* Zero if an error has been encountered. */ + }; - J->loser = J; - J->rn = 0; - J->fe = x[(x_max + j) / 2]; - J->fi = x[j / 2]; - memset (J->record, 0, dict_get_case_size (default_dict)); - } - } +static void destroy_initial_run_state (struct initial_run_state *irs); +static int allocate_cases (struct initial_run_state *); +static struct case_list *grab_case (struct initial_run_state *); +static void release_case (struct initial_run_state *, struct case_list *); +static void output_record (struct initial_run_state *irs); +static void start_run (struct initial_run_state *irs); +static void end_run (struct initial_run_state *irs); +static int compare_record_run (const struct record_run *, + const struct record_run *, + struct sort_cases_pgm *); +static int compare_record_run_minheap (const void *, const void *, void *); - /* Most of the iterations of steps R4, R5, R6, R7, R2, R3, ... */ +static int +write_initial_runs (struct external_sort *xsrt, int separate) +{ + struct initial_run_state *irs; + int success = 0; + + /* Allocate memory for cases. */ + irs = xmalloc (sizeof *irs); + irs->xsrt = xsrt; + irs->records = NULL; + irs->record_cnt = irs->record_cap = 0; + irs->free_list = NULL; + irs->output_file = NULL; + irs->last_output = NULL; + irs->file_idx = 0; + irs->case_cnt = 0; + irs->okay = 1; + if (!allocate_cases (irs)) + goto done; + + /* Create case sink. */ if (!separate) { if (vfm_sink) - vfm_sink->destroy_sink (); - vfm_sink = &sort_stream; + vfm_sink->class->destroy (vfm_sink); + vfm_sink = create_case_sink (&sort_sink_class, irs); + xsrt->scp->ref_cnt++; } + + /* Create initial runs. */ + start_run (irs); procedure (NULL, NULL, NULL, NULL); + while (irs->record_cnt > 0 && irs->okay) + output_record (irs); + end_run (irs); - /* Final iterations of steps R4, R5, R6, R7, R2, R3, ... */ - for (;;) - { - struct repl_sel_tree *t; + success = irs->okay; - /* R4. */ - rq = rmax + 1; + done: + destroy_initial_run_state (irs); - /* R5. */ - t = q->fe; + return success; +} - /* R6 and R7. */ - for (;;) - { - /* R6. */ - if (t->rn < rq - || (t->rn == rq - && compare_record (t->loser->record, q->record) < 0)) - { - struct repl_sel_tree *temp_tree; - int temp_int; +/* Add a single case to an initial run. */ +static void +sort_sink_write (struct case_sink *sink, struct ccase *c) +{ + struct initial_run_state *irs = sink->aux; + struct record_run *new_record_run; - temp_tree = t->loser; - t->loser = q; - q = temp_tree; + if (!irs->okay) + return; - temp_int = t->rn; - t->rn = rq; - rq = temp_int; - } + /* Compose record_run for this run and add to heap. */ + assert (irs->record_cnt < irs->record_cap); + new_record_run = irs->records + irs->record_cnt++; + new_record_run->record = grab_case (irs); + memcpy (new_record_run->record->c.data, c->data, vfm_sink_info.case_size); + new_record_run->run = irs->file_idx; + if (irs->last_output != NULL + && compare_record (c->data, irs->last_output->c.data, + irs->xsrt->scp) < 0) + new_record_run->run = irs->xsrt->next_file_idx; + push_heap (irs->records, irs->record_cnt, sizeof *irs->records, + compare_record_run_minheap, irs->xsrt->scp); + + /* Output a record if the reservoir is full. */ + if (irs->record_cnt == irs->record_cap && irs->okay) + output_record (irs); +} - /* R7. */ - if (t == x[1]) - break; - t = t->fi; - } +static void +destroy_initial_run_state (struct initial_run_state *irs) +{ + struct case_list *iter, *next; + int i; - /* R2. */ - if (rq != rc) - { - end_run (); - if (rq > rmax) - break; - begin_run (); - rc = rq; - } + if (irs == NULL) + return; - /* R3. */ - if (rq != 0) - { - output_record (q->record); - lastkey = x[x_max]->record; - memcpy (lastkey, q->record, sizeof (union value) * vfm_sink_info.nval); - } - } - assert (run_no == rmax); + /* Release cases to free list. */ + for (i = 0; i < irs->record_cnt; i++) + release_case (irs, irs->records[i].record); + if (irs->last_output != NULL) + release_case (irs, irs->last_output); - /* If an unrecoverable error occurred somewhere in the above code, - then the `deferred_abort' flag would have been set. */ - if (deferred_abort) + /* Free cases in free list. */ + for (iter = irs->free_list; iter != NULL; iter = next) { - int i; + next = iter->next; + free (iter); + } + + free (irs->records); + free (irs); + + if (irs->output_file != NULL) + close_temp_file (irs->xsrt, irs->file_idx, irs->output_file); +} - for (i = 0; i < max_handles; i++) - if (handle[i] != NULL) - { - sprintf (tmp_extname, "%08x", i); +/* Allocates room for lots of cases as a buffer. */ +static int +allocate_cases (struct initial_run_state *irs) +{ + size_t case_size; /* Size of one case, in bytes. */ + int approx_case_cost; /* Approximate memory cost of one case in bytes. */ + int max_cases; /* Maximum number of cases to allocate. */ + int i; - if (fclose (handle[i]) == EOF) - msg (SE, _("%s: Error closing temporary file: %s."), - tmp_basename, strerror (errno)); + /* Allocate as many cases as we can within the workspace + limit. */ + case_size = dict_get_case_size (default_dict); + approx_case_cost = (sizeof *irs->records + + sizeof *irs->free_list + + case_size + + 4 * sizeof (void *)); + max_cases = set_max_workspace / approx_case_cost; + irs->records = malloc (sizeof *irs->records * max_cases); + for (i = 0; i < max_cases; i++) + { + struct case_list *c; + c = malloc (sizeof *c + case_size - sizeof (union value)); + if (c == NULL) + { + max_cases = i; + break; + } + release_case (irs, c); + } - if (remove (tmp_basename) != 0) - msg (SE, _("%s: Error removing temporary file: %s."), - tmp_basename, strerror (errno)); + /* irs->records gets all but one of the allocated cases. + The extra is used for last_output. */ + irs->record_cap = max_cases - 1; - handle[i] = NULL; - } + /* Fail if we didn't allocate an acceptable number of cases. */ + if (irs->records == NULL || max_cases < MIN_BUFFER_TOTAL_SIZE_RECS) + { + msg (SE, _("Out of memory. Could not allocate room for minimum of %d " + "cases of %d bytes each. (PSPP workspace is currently " + "restricted to a maximum of %d KB.)"), + MIN_BUFFER_TOTAL_SIZE_RECS, approx_case_cost, set_max_workspace / 1024); return 0; } - return 1; } -/* Compares the NV_SORT variables in V_SORT[] between the `value's at +/* Compares the VAR_CNT variables in VARS[] between the `value's at A and B, and returns a strcmp()-type result. */ static int -compare_record (union value * a, union value * b) +compare_record (const union value *a, const union value *b, + const struct sort_cases_pgm *scp) { int i; - int result = 0; - struct variable *v; assert (a != NULL); - if (b == NULL) /* Sort NULLs after everything else. */ - return -1; - - for (i = 0; i < nv_sort; i++) + assert (b != NULL); + + for (i = 0; i < scp->var_cnt; i++) { - v = v_sort[i]; + struct variable *v = scp->vars[i]; + int fv = v->fv; + int result; if (v->type == NUMERIC) - { - if (a[v->fv].f != b[v->fv].f) - { - result = (a[v->fv].f > b[v->fv].f) ? 1 : -1; - break; - } - } + { + double af = a[fv].f; + double bf = b[fv].f; + + result = af < bf ? -1 : af > bf; + } else - { - result = memcmp (a[v->fv].s, b[v->fv].s, v->width); - if (result != 0) - break; - } + result = memcmp (a[fv].s, b[fv].s, v->width); + + if (result != 0) + { + if (scp->dirs[i] == SRT_DESCEND) + result = -result; + return result; + } } - if (v->p.srt.order == SRT_ASCEND) - return result; + return 0; +} + +static int +compare_record_run (const struct record_run *a, + const struct record_run *b, + struct sort_cases_pgm *scp) +{ + if (a->run != b->run) + return a->run > b->run ? 1 : -1; else + return compare_record (a->record->c.data, b->record->c.data, scp); +} + +static int +compare_record_run_minheap (const void *a, const void *b, void *scp) +{ + return -compare_record_run (a, b, scp); +} + +/* Begins a new initial run, specifically its output file. */ +static void +start_run (struct initial_run_state *irs) +{ + irs->file_idx = irs->xsrt->next_file_idx++; + irs->case_cnt = 0; + irs->output_file = open_temp_file (irs->xsrt, irs->file_idx, "wb"); + if (irs->output_file == NULL) + irs->okay = 0; + if (irs->last_output != NULL) { - assert (v->p.srt.order == SRT_DESCEND); - return -result; + release_case (irs, irs->last_output); + irs->last_output = NULL; } } - -/* Merging. */ -static int merge_once (int run_index[], int run_length[], int n_runs); +/* Ends the current initial run. */ +static void +end_run (struct initial_run_state *irs) +{ + struct external_sort *xsrt = irs->xsrt; + + /* Record initial run. */ + if (xsrt->run_cnt >= xsrt->run_cap) + { + xsrt->run_cap *= 2; + xsrt->initial_runs + = xrealloc (xsrt->initial_runs, + sizeof *xsrt->initial_runs * xsrt->run_cap); + } + xsrt->initial_runs[xsrt->run_cnt].file_idx = irs->file_idx; + xsrt->initial_runs[xsrt->run_cnt].case_cnt = irs->case_cnt; + xsrt->run_cnt++; + + /* Close file handle. */ + if (irs->output_file != NULL + && !close_temp_file (irs->xsrt, irs->file_idx, irs->output_file)) + irs->okay = 0; + irs->output_file = NULL; +} -/* Modula function as defined by Knuth. */ -static int -mod (int x, int y) +static void +output_record (struct initial_run_state *irs) { - int result; + struct record_run *record_run; + struct ccase *out_case; + + /* Extract minimum case from heap. */ + assert (irs->record_cnt > 0); + pop_heap (irs->records, irs->record_cnt--, sizeof *irs->records, + compare_record_run_minheap, irs->xsrt->scp); + record_run = irs->records + irs->record_cnt; + + /* Bail if an error has occurred. */ + if (!irs->okay) + return; - if (y == 0) - return x; - result = abs (x) % abs (y); - if (y < 0) - result = -result; - return result; + /* Obtain case data to write to disk. */ + out_case = &record_run->record->c; + if (compaction_necessary) + { + compact_case (compaction_case, out_case); + out_case = compaction_case; + } + + /* Start new run if necessary. */ + assert (record_run->run == irs->file_idx + || record_run->run == irs->xsrt->next_file_idx); + if (record_run->run != irs->file_idx) + { + end_run (irs); + start_run (irs); + } + assert (record_run->run == irs->file_idx); + irs->case_cnt++; + + /* Write to disk. */ + if (irs->output_file != NULL + && !write_temp_file (irs->xsrt, irs->file_idx, irs->output_file, + out_case->data, + sizeof *out_case->data * compaction_nval)) + irs->okay = 0; + + /* This record becomes last_output. */ + if (irs->last_output != NULL) + release_case (irs, irs->last_output); + irs->last_output = record_run->record; } -/* Performs a series of P-way merges of initial runs using Huffman's +static struct case_list * +grab_case (struct initial_run_state *irs) +{ + struct case_list *c; + + assert (irs != NULL); + assert (irs->free_list != NULL); + + c = irs->free_list; + irs->free_list = c->next; + return c; +} + +static void +release_case (struct initial_run_state *irs, struct case_list *c) +{ + assert (irs != NULL); + assert (c != NULL); + + c->next = irs->free_list; + irs->free_list = c; +} + +/* Merging. */ + +struct merge_state + { + struct external_sort *xsrt; /* External sort state. */ + struct ccase **cases; /* Buffers. */ + size_t case_cnt; /* Number of buffers. */ + }; + +struct run; +static int merge_once (struct merge_state *, + const struct initial_run[], size_t, + struct initial_run *); +static int fill_run_buffer (struct merge_state *, struct run *); +static int mod (int, int); + +/* Performs a series of P-way merges of initial runs method. */ static int -merge (void) +merge (struct external_sort *xsrt) { - /* Order of merge. */ - int order; + struct merge_state mrg; /* State of merge. */ + size_t case_size; /* Size of one case, in bytes. */ + size_t approx_case_cost; /* Approximate memory cost of one case. */ + int max_order; /* Maximum order of merge. */ + size_t dummy_run_cnt; /* Number of dummy runs to insert. */ + int success = 0; + int i; - /* Idiot check. */ - assert (MIN_BUFFER_SIZE_RECS * 2 <= MIN_BUFFER_TOTAL_SIZE_RECS - 1); + mrg.xsrt = xsrt; - /* Close all the input files. I hope that the boundary conditions - are correct on this but I'm not sure. */ - if (run_no < max_handles) + /* Allocate as many cases as possible into cases. */ + case_size = dict_get_case_size (default_dict); + approx_case_cost = sizeof *mrg.cases + case_size + 4 * sizeof (void *); + mrg.case_cnt = set_max_workspace / approx_case_cost; + mrg.cases = malloc (sizeof *mrg.cases * mrg.case_cnt); + if (mrg.cases == NULL) + goto done; + for (i = 0; i < mrg.case_cnt; i++) { - int i; - - for (i = 0; i < run_no; ) - if (!close_handle (i++)) - { - for (; i < run_no; i++) - close_handle (i); - return 0; - } + mrg.cases[i] = malloc (case_size); + if (mrg.cases[i] == NULL) + { + mrg.case_cnt = i; + break; + } + } + if (mrg.case_cnt < MIN_BUFFER_TOTAL_SIZE_RECS) + { + msg (SE, _("Out of memory. Could not allocate room for minimum of %d " + "cases of %d bytes each. (PSPP workspace is currently " + "restricted to a maximum of %d KB.)"), + MIN_BUFFER_TOTAL_SIZE_RECS, approx_case_cost, set_max_workspace / 1024); + return 0; } - /* Determine order of merge. */ - order = MAX_MERGE_ORDER; - if (x_max / order < MIN_BUFFER_SIZE_RECS) - order = x_max / MIN_BUFFER_SIZE_RECS; - else if (x_max / order * dict_get_case_size (default_dict) - < MIN_BUFFER_SIZE_BYTES) - order = x_max / (MIN_BUFFER_SIZE_BYTES - / dict_get_case_size (default_dict)); - - /* Make sure the order of merge is bounded. */ - if (order < 2) - order = 2; - if (order > rmax) - order = rmax; - assert (x_max / order > 0); - - /* Calculate number of records per buffer. */ - records_per_buffer = x_max / order; - - /* Add (1 - S) mod (P - 1) dummy runs of length 0. */ - { - int n_dummy_runs = mod (1 - rmax, order - 1); - debug_printf (("rmax=%d, order=%d, n_dummy_runs=%d\n", - rmax, order, n_dummy_runs)); - assert (n_dummy_runs >= 0); - while (n_dummy_runs--) - { - heap_insert (huffman_queue, -2, 0); - rmax++; - } - } + /* Determine maximum order of merge. */ + max_order = MAX_MERGE_ORDER; + if (mrg.case_cnt / max_order < MIN_BUFFER_SIZE_RECS) + max_order = mrg.case_cnt / MIN_BUFFER_SIZE_RECS; + else if (mrg.case_cnt / max_order * case_size < MIN_BUFFER_SIZE_BYTES) + max_order = mrg.case_cnt / (MIN_BUFFER_SIZE_BYTES / case_size); + if (max_order < 2) + max_order = 2; + if (max_order > xsrt->run_cnt) + max_order = xsrt->run_cnt; /* Repeatedly merge the P shortest existing runs until only one run is left. */ - while (rmax > 1) + make_heap (xsrt->initial_runs, xsrt->run_cnt, sizeof *xsrt->initial_runs, + compare_initial_runs, NULL); + dummy_run_cnt = mod (1 - (int) xsrt->run_cnt, max_order - 1); + assert (max_order == 1 + || (xsrt->run_cnt + dummy_run_cnt) % (max_order - 1) == 1); + while (xsrt->run_cnt > 1) { - int run_index[MAX_MERGE_ORDER]; - int run_length[MAX_MERGE_ORDER]; - int total_run_length = 0; + struct initial_run output_run; + int order; int i; - assert (rmax >= order); - - /* Find the shortest runs; put them in runs[] in reverse order - of length, to force dummy runs of length 0 to the end of the - list. */ - debug_printf ((_("merging runs"))); - for (i = order - 1; i >= 0; i--) - { - run_index[i] = heap_delete (huffman_queue, &run_length[i]); - assert (run_index[i] != -1); - total_run_length += run_length[i]; - debug_printf ((" %d(%d)", run_index[i], run_length[i])); - } - debug_printf ((_(" into run %d(%d)\n"), run_no, total_run_length)); - - if (!merge_once (run_index, run_length, order)) - { - int index; - - while (-1 != (index = heap_delete (huffman_queue, NULL))) - { - sprintf (tmp_extname, "%08x", index); - if (remove (tmp_basename) != 0) - msg (SE, _("%s: Error removing temporary file: %s."), - tmp_basename, strerror (errno)); - } - - return 0; - } + /* Choose order of merge (max_order after first merge). */ + order = max_order - dummy_run_cnt; + dummy_run_cnt = 0; + + /* Choose runs to merge. */ + assert (xsrt->run_cnt >= order); + for (i = 0; i < order; i++) + pop_heap (xsrt->initial_runs, xsrt->run_cnt--, + sizeof *xsrt->initial_runs, + compare_initial_runs, NULL); + + /* Merge runs. */ + if (!merge_once (&mrg, xsrt->initial_runs + xsrt->run_cnt, order, + &output_run)) + goto done; + + /* Add output run to heap. */ + xsrt->initial_runs[xsrt->run_cnt++] = output_run; + push_heap (xsrt->initial_runs, xsrt->run_cnt, sizeof *xsrt->initial_runs, + compare_initial_runs, NULL); + } - if (!heap_insert (huffman_queue, run_no++, total_run_length)) - { - msg (SE, _("Out of memory expanding Huffman priority queue.")); - return 0; - } + /* Exactly one run is left, which contains the entire sorted + file. We could use it to find a total case count. */ + assert (xsrt->run_cnt == 1); - rmax -= order - 1; - } + success = 1; - /* There should be exactly one element in the priority queue after - all that merging. This represents the entire sorted active file. - So we could find a total case count by deleting this element from - the queue. */ - assert (heap_size (huffman_queue) == 1); + done: + for (i = 0; i < mrg.case_cnt; i++) + free (mrg.cases[i]); + free (mrg.cases); - return 1; + return success; } -/* Merges N_RUNS initial runs into a new run. The jth run for 0 <= j - < N_RUNS is taken from temporary file RUN_INDEX[j]; it is composed - of RUN_LENGTH[j] cases. */ +/* Modulo function as defined by Knuth. */ static int -merge_once (int run_index[], int run_length[], int n_runs) +mod (int x, int y) { - /* For each run, the number of records remaining in its buffer. */ - int buffered[MAX_MERGE_ORDER]; + if (y == 0) + return x; + else if (x == 0) + return 0; + else if (x > 0 && y > 0) + return x % y; + else if (x < 0 && y > 0) + return y - (-x) % y; - /* For each run, the index of the next record in the buffer. */ - int buffer_ptr[MAX_MERGE_ORDER]; + assert (0); +} - /* Open input files. */ +/* A run of data for use in merging. */ +struct run { - int i; + FILE *file; /* File that contains run. */ + int file_idx; /* Index of file that contains run. */ + struct ccase **buffer; /* Case buffer. */ + struct ccase **buffer_head; /* First unconsumed case in buffer. */ + struct ccase **buffer_tail; /* One past last unconsumed case in buffer. */ + size_t buffer_cap; /* Number of cases buffer can hold. */ + size_t unread_case_cnt; /* Number of cases not yet read. */ + }; - for (i = 0; i < n_runs; i++) - if (run_index[i] != -2 && !open_handle_r (i, run_index[i])) - { - /* Close and remove temporary files. */ - while (i--) - { - close_handle (i); - sprintf (tmp_extname, "%08x", i); - if (remove (tmp_basename) != 0) - msg (SE, _("%s: Error removing temporary file: %s."), - tmp_basename, strerror (errno)); - } +/* Merges the RUN_CNT initial runs specified in INPUT_RUNS into a + new run. Returns nonzero only if successful. Adds an entry + to MRG->xsrt->runs for the output file if and only if the + output file is actually created. Always deletes all the input + files. */ +static int +merge_once (struct merge_state *mrg, + const struct initial_run input_runs[], + size_t run_cnt, + struct initial_run *output_run) +{ + struct run runs[MAX_MERGE_ORDER]; + FILE *output_file = NULL; + size_t case_size; + int success = 0; + int i; - return 0; - } - } + /* Initialize runs[]. */ + for (i = 0; i < run_cnt; i++) + { + runs[i].file = NULL; + runs[i].file_idx = input_runs[i].file_idx; + runs[i].buffer = mrg->cases + mrg->case_cnt / run_cnt * i; + runs[i].buffer_head = runs[i].buffer; + runs[i].buffer_tail = runs[i].buffer; + runs[i].buffer_cap = mrg->case_cnt / run_cnt; + runs[i].unread_case_cnt = input_runs[i].case_cnt; + } - /* Create output file. */ - if (!open_handle_w (N_INPUT_BUFFERS, run_no)) + /* Open input files. */ + for (i = 0; i < run_cnt; i++) + { + runs[i].file = open_temp_file (mrg->xsrt, runs[i].file_idx, "rb"); + if (runs[i].file == NULL) + goto error; + } + + /* Create output file and count cases to be output. */ + output_run->file_idx = mrg->xsrt->next_file_idx++; + output_run->case_cnt = 0; + for (i = 0; i < run_cnt; i++) + output_run->case_cnt += input_runs[i].case_cnt; + output_file = open_temp_file (mrg->xsrt, output_run->file_idx, "wb"); + if (output_file == NULL) + goto error; + + /* Prime buffers. */ + for (i = 0; i < run_cnt; i++) + if (!fill_run_buffer (mrg, runs + i)) + goto error; + + /* Merge. */ + case_size = dict_get_case_size (default_dict); + while (run_cnt > 0) { - msg (SE, _("%s: Error creating temporary file for merge: %s."), - tmp_basename, strerror (errno)); - goto lossage; + struct run *min_run; + + /* Find minimum. */ + min_run = runs; + for (i = 1; i < run_cnt; i++) + if (compare_record ((*runs[i].buffer_head)->data, + (*min_run->buffer_head)->data, + mrg->xsrt->scp) < 0) + min_run = runs + i; + + /* Write minimum to output file. */ + if (!write_temp_file (mrg->xsrt, min_run->file_idx, output_file, + (*min_run->buffer_head)->data, case_size)) + goto error; + + /* Remove case from buffer. */ + if (++min_run->buffer_head >= min_run->buffer_tail) + { + /* Buffer is empty. Fill from file. */ + if (!fill_run_buffer (mrg, min_run)) + goto error; + + /* If buffer is still empty, delete its run. */ + if (min_run->buffer_head >= min_run->buffer_tail) + { + close_temp_file (mrg->xsrt, min_run->file_idx, min_run->file); + remove_temp_file (mrg->xsrt, min_run->file_idx); + *min_run = runs[--run_cnt]; + + /* We could donate the now-unused buffer space to + other runs. */ + } + } } - /* Prime each buffer. */ - { - int i; + /* Close output file. */ + close_temp_file (mrg->xsrt, output_run->file_idx, output_file); - for (i = 0; i < n_runs; i++) - if (run_index[i] == -2) - { - n_runs = i; - break; - } - else - { - int j; - int ofs = records_per_buffer * i; - - buffered[i] = min (records_per_buffer, run_length[i]); - for (j = 0; j < buffered[i]; j++) - if (fread (x[j + ofs]->record, - dict_get_case_size (default_dict), 1, handle[i]) != 1) - { - sprintf (tmp_extname, "%08x", run_index[i]); - if (ferror (handle[i])) - msg (SE, _("%s: Error reading temporary file in merge: %s."), - tmp_basename, strerror (errno)); - else - msg (SE, _("%s: Unexpected end of temporary file in merge."), - tmp_basename); - goto lossage; - } - buffer_ptr[i] = ofs; - run_length[i] -= buffered[i]; - } - } + return 1; - /* Perform the merge proper. */ - while (n_runs) /* Loop while some data is left. */ + error: + /* Close and remove output file. */ + if (output_file != NULL) { - int i; - int min = 0; + close_temp_file (mrg->xsrt, output_run->file_idx, output_file); + remove_temp_file (mrg->xsrt, output_run->file_idx); + } + + /* Close and remove any remaining input runs. */ + for (i = 0; i < run_cnt; i++) + { + close_temp_file (mrg->xsrt, runs[i].file_idx, runs[i].file); + remove_temp_file (mrg->xsrt, runs[i].file_idx); + } - for (i = 1; i < n_runs; i++) - if (compare_record (x[buffer_ptr[min]]->record, - x[buffer_ptr[i]]->record) > 0) - min = i; + return success; +} - if (fwrite (x[buffer_ptr[min]]->record, - dict_get_case_size (default_dict), 1, - handle[N_INPUT_BUFFERS]) != 1) - { - sprintf (tmp_extname, "%08x", run_index[i]); - msg (SE, _("%s: Error writing temporary file in " - "merge: %s."), tmp_basename, strerror (errno)); - goto lossage; - } +/* Reads as many cases as possible into RUN's buffer. + Reads nonzero unless a disk error occurs. */ +static int +fill_run_buffer (struct merge_state *mrg, struct run *run) +{ + run->buffer_head = run->buffer_tail = run->buffer; + while (run->unread_case_cnt > 0 + && run->buffer_tail < run->buffer + run->buffer_cap) + { + if (!read_temp_file (mrg->xsrt, run->file_idx, run->file, + (*run->buffer_tail)->data, + dict_get_case_size (default_dict))) + return 0; - /* Remove one case from the buffer for this input file. */ - if (--buffered[min] == 0) - { - /* The input buffer is empty. Do any cases remain in the - initial run on disk? */ - if (run_length[min]) - { - /* Yes. Read them in. */ - - int j; - int ofs; - - /* Reset the buffer pointer. Note that we can't simply - set it to (i * records_per_buffer) since the run - order might have changed. */ - ofs = buffer_ptr[min] -= buffer_ptr[min] % records_per_buffer; - - buffered[min] = min (records_per_buffer, run_length[min]); - for (j = 0; j < buffered[min]; j++) - if (fread (x[j + ofs]->record, - dict_get_case_size (default_dict), 1, handle[min]) - != 1) - { - sprintf (tmp_extname, "%08x", run_index[min]); - if (ferror (handle[min])) - msg (SE, _("%s: Error reading temporary file in " - "merge: %s."), - tmp_basename, strerror (errno)); - else - msg (SE, _("%s: Unexpected end of temporary file " - "in merge."), - tmp_basename); - goto lossage; - } - run_length[min] -= buffered[min]; - } - else - { - /* No. Delete this run. */ - - /* Close the file. */ - FILE *f = handle[min]; - handle[min] = NULL; - sprintf (tmp_extname, "%08x", run_index[min]); - if (fclose (f) == EOF) - msg (SE, _("%s: Error closing temporary file in merge: " - "%s."), tmp_basename, strerror (errno)); - - /* Delete the file. */ - if (remove (tmp_basename) != 0) - msg (SE, _("%s: Error removing temporary file in merge: " - "%s."), tmp_basename, strerror (errno)); - - n_runs--; - if (min != n_runs) - { - /* Since this isn't the last run, we move the last - run into its spot to force all the runs to be - contiguous. */ - run_index[min] = run_index[n_runs]; - run_length[min] = run_length[n_runs]; - buffer_ptr[min] = buffer_ptr[n_runs]; - buffered[min] = buffered[n_runs]; - handle[min] = handle[n_runs]; - } - } - } - else - buffer_ptr[min]++; + run->unread_case_cnt--; + run->buffer_tail++; } - /* Close output file. */ - { - FILE *f = handle[N_INPUT_BUFFERS]; - handle[N_INPUT_BUFFERS] = NULL; - if (fclose (f) == EOF) - { - sprintf (tmp_extname, "%08x", run_no); - msg (SE, _("%s: Error closing temporary file in merge: " - "%s."), - tmp_basename, strerror (errno)); - return 0; - } - } - return 1; +} + +static void +sort_sink_destroy (struct case_sink *sink UNUSED) +{ + assert (0); +} -lossage: - /* Close all the input and output files. */ - { - int i; +static struct case_source * +sort_sink_make_source (struct case_sink *sink) +{ + struct initial_run_state *irs = sink->aux; - for (i = 0; i < n_runs; i++) - if (run_length[i] != 0) - { - close_handle (i); - sprintf (tmp_basename, "%08x", run_index[i]); - if (remove (tmp_basename) != 0) - msg (SE, _("%s: Error removing temporary file: %s."), - tmp_basename, strerror (errno)); - } - } - close_handle (N_INPUT_BUFFERS); - sprintf (tmp_basename, "%08x", run_no); - if (remove (tmp_basename) != 0) - msg (SE, _("%s: Error removing temporary file: %s."), - tmp_basename, strerror (errno)); - return 0; + return create_case_source (&sort_source_class, irs->xsrt->scp); } - -/* External sort input program. */ +const struct case_sink_class sort_sink_class = + { + "SORT CASES", + NULL, + sort_sink_write, + sort_sink_destroy, + sort_sink_make_source, + }; + /* Reads all the records from the source stream and passes them to write_case(). */ static void -sort_stream_read (write_case_func *write_case, write_case_data wc_data) +sort_source_read (struct case_source *source, + write_case_func *write_case, write_case_data wc_data) { - read_sort_output (write_case, wc_data); + struct sort_cases_pgm *scp = source->aux; + + read_sort_output (scp, write_case, wc_data); } +void read_internal_sort_output (struct internal_sort *isrt, + write_case_func *write_case, + write_case_data wc_data); +void read_external_sort_output (struct external_sort *xsrt, + write_case_func *write_case, + write_case_data wc_data); + /* Reads all the records from the output stream and passes them to the function provided, which must have an interface identical to write_case(). */ void -read_sort_output (write_case_func *write_case, write_case_data wc_data) +read_sort_output (struct sort_cases_pgm *scp, + write_case_func *write_case, write_case_data wc_data) { - int i; - FILE *f; - - if (separate_case_tab) + assert ((scp->isrt != NULL) + (scp->xsrt != NULL) <= 1); + if (scp->isrt != NULL) + read_internal_sort_output (scp->isrt, write_case, wc_data); + else if (scp->xsrt != NULL) + read_external_sort_output (scp->xsrt, write_case, wc_data); + else { - struct ccase *save_temp_case = temp_case; - struct case_list **p; - - for (p = separate_case_tab; *p; p++) - { - temp_case = &(*p)->c; - write_case (wc_data); - } - - free (separate_case_tab); - separate_case_tab = NULL; - - temp_case = save_temp_case; - } else { - sprintf (tmp_extname, "%08x", run_no - 1); - f = fopen (tmp_basename, "rb"); - if (!f) - { - msg (ME, _("%s: Cannot open sort result file: %s."), tmp_basename, - strerror (errno)); - err_failure (); - return; - } - - for (i = 0; i < vfm_source_info.ncases; i++) - { - if (!fread (temp_case, vfm_source_info.case_size, 1, f)) - { - if (ferror (f)) - msg (ME, _("%s: Error reading sort result file: %s."), - tmp_basename, strerror (errno)); - else - msg (ME, _("%s: Unexpected end of sort result file: %s."), - tmp_basename, strerror (errno)); - err_failure (); - break; - } - - if (!write_case (wc_data)) - break; - } - - if (fclose (f) == EOF) - msg (ME, _("%s: Error closing sort result file: %s."), tmp_basename, - strerror (errno)); - - if (remove (tmp_basename) != 0) - msg (ME, _("%s: Error removing sort result file: %s."), tmp_basename, - strerror (errno)); - else - rmdir_temp_dir (); + /* No results. Probably an external sort that failed. */ } } -#if 0 /* dead code */ -/* Alternate interface to sort_stream_write used for external sorting - when SEPARATE is true. */ -static int -write_separate (struct ccase *c) +void +read_internal_sort_output (struct internal_sort *isrt, + write_case_func *write_case, + write_case_data wc_data) { - assert (c == temp_case); + struct ccase *save_temp_case = temp_case; + struct case_list **p; - sort_stream_write (); - return 1; + for (p = isrt->results; *p; p++) + { + temp_case = &(*p)->c; + write_case (wc_data); + } + free (isrt->results); + + temp_case = save_temp_case; } -#endif -/* Performs one iteration of 5.4.1R steps R4, R5, R6, R7, R2, and - R3. */ -static void -sort_stream_write (void) +void +read_external_sort_output (struct external_sort *xsrt, + write_case_func *write_case, + write_case_data wc_data) { - struct repl_sel_tree *t; - - /* R4. */ - memcpy (q->record, temp_case->data, vfm_sink_info.case_size); - if (compare_record (q->record, lastkey) < 0) - if (++rq > rmax) - rmax = rq; + FILE *file; + int file_idx; + int i; - /* R5. */ - t = q->fe; + assert (xsrt->run_cnt == 1); + file_idx = xsrt->initial_runs[0].file_idx; - /* R6 and R7. */ - for (;;) + file = open_temp_file (xsrt, file_idx, "rb"); + if (file == NULL) { - /* R6. */ - if (t->rn < rq - || (t->rn == rq && compare_record (t->loser->record, q->record) < 0)) - { - struct repl_sel_tree *temp_tree; - int temp_int; - - temp_tree = t->loser; - t->loser = q; - q = temp_tree; - - temp_int = t->rn; - t->rn = rq; - rq = temp_int; - } - - /* R7. */ - if (t == x[1]) - break; - t = t->fi; + err_failure (); + return; } - /* R2. */ - if (rq != rc) + for (i = 0; i < vfm_source_info.ncases; i++) { - end_run (); - begin_run (); - assert (rq <= rmax); - rc = rq; - } + if (!read_temp_file (xsrt, file_idx, file, + temp_case, vfm_source_info.case_size)) + { + err_failure (); + break; + } - /* R3. */ - if (rq != 0) - { - output_record (q->record); - lastkey = x[x_max]->record; - memcpy (lastkey, q->record, vfm_sink_info.case_size); + if (!write_case (wc_data)) + break; } } -/* Switches mode from sink to source. */ static void -sort_stream_mode (void) +sort_source_destroy (struct case_source *source) { - /* If this is not done, then we get the following source/sink pairs: - source=memory/disk/DATA LIST/etc., sink=SORT; source=SORT, - sink=SORT; which is not good. */ - vfm_sink = NULL; + struct sort_cases_pgm *scp = source->aux; + + destroy_sort_cases_pgm (scp); } -struct case_stream sort_stream = +const struct case_source_class sort_source_class = { - NULL, - sort_stream_read, - sort_stream_write, - sort_stream_mode, - NULL, - NULL, - "SORT", + "SORT CASES", + sort_source_read, + sort_source_destroy, }; diff --git a/src/sort.h b/src/sort.h index 15a5b2db..928a5170 100644 --- a/src/sort.h +++ b/src/sort.h @@ -22,12 +22,31 @@ #include "vfm.h" -/* SORT CASES programmatic interface. */ -int sort_cases (int separate); -void read_sort_output (write_case_func *, write_case_data); +/* Sort direction. */ +enum sort_direction + { + SRT_ASCEND, /* A, B, C, ..., X, Y, Z. */ + SRT_DESCEND /* Z, Y, X, ..., C, B, A. */ + }; + +/* SORT CASES input program. */ +struct sort_cases_pgm + { + int ref_cnt; /* Reference count. */ + + struct variable **vars; /* Variables to sort. */ + enum sort_direction *dirs; /* Sort directions. */ + int var_cnt; /* Number of variables to sort. */ + + struct internal_sort *isrt; /* Internal sort output. */ + struct external_sort *xsrt; /* External sort output. */ + }; -/* Variables to sort. */ -extern struct variable **v_sort; -extern int nv_sort; +/* SORT CASES programmatic interface. */ +struct sort_cases_pgm *parse_sort (void); +int sort_cases (struct sort_cases_pgm *, int separate); +void read_sort_output (struct sort_cases_pgm *, + write_case_func *, write_case_data); +void destroy_sort_cases_pgm (struct sort_cases_pgm *); #endif /* !sort_h */ diff --git a/src/var.h b/src/var.h index 277bfde8..bf2e4d50 100644 --- a/src/var.h +++ b/src/var.h @@ -151,19 +151,6 @@ struct get_proc int fv, nv; /* First, # of values. */ }; -/* Sort order. */ -enum - { - SRT_ASCEND, /* A, B, C, ..., X, Y, Z. */ - SRT_DESCEND /* Z, Y, X, ..., C, B, A. */ - }; - -/* SORT CASES private data. */ -struct sort_cases_proc - { - int order; /* SRT_ASCEND or SRT_DESCEND. */ - }; - /* MEANS private data. */ struct means_proc { @@ -263,7 +250,6 @@ struct variable struct frequencies_proc frq; struct list_proc lst; struct means_proc mns; - struct sort_cases_proc srt; struct matrix_data_proc mxd; struct match_files_proc mtf; struct t_test_proc t_t; @@ -302,6 +288,13 @@ struct ccase { union value data[1]; }; + +/* Linked list of cases. */ +struct case_list + { + struct case_list *next; + struct ccase c; + }; /* Dictionary. */ diff --git a/src/vars-atr.c b/src/vars-atr.c index ee4d2302..00453767 100644 --- a/src/vars-atr.c +++ b/src/vars-atr.c @@ -55,9 +55,11 @@ discard_variables (void) n_lag = 0; - if (vfm_source) + if (vfm_source != NULL) { - vfm_source->destroy_source (); + if (vfm_source->class->destroy != NULL) + vfm_source->class->destroy (vfm_source); + free (vfm_source); vfm_source = NULL; } diff --git a/src/vfm.c b/src/vfm.c index 42e6b7cc..82af2000 100644 --- a/src/vfm.c +++ b/src/vfm.c @@ -33,6 +33,7 @@ #include "expr.h" #include "misc.h" #include "random.h" +#include "settings.h" #include "som.h" #include "str.h" #include "tab.h" @@ -42,10 +43,11 @@ /* Virtual File Manager (vfm): - vfm is used to process data files. It uses the model that data is - read from one stream (the data source), then written to another - (the data sink). The data source is then deleted and the data sink - becomes the data source for the next procedure. */ + vfm is used to process data files. It uses the model that + data is read from one stream (the data source), processed, + then written to another (the data sink). The data source is + then deleted and the data sink becomes the data source for the + next procedure. */ #include "debug-print.h" @@ -58,11 +60,11 @@ struct write_case_data void *aux; }; -/* This is used to read from the active file. */ -struct case_stream *vfm_source; +/* The current active file, from which cases are read. */ +struct case_source *vfm_source; -/* This is used to write to the replacement active file. */ -struct case_stream *vfm_sink; +/* The replacement active file, to which cases are written. */ +struct case_sink *vfm_sink; /* Information about the data source. */ struct stream_info vfm_source_info; @@ -82,9 +84,11 @@ int compaction_nval; `value's. */ struct ccase *compaction_case; -/* Within a session, when paging is turned on, it is never turned back - off. This policy might be too aggressive. */ -static int paging = 0; +/* Nonzero means that we've overflowed our allotted workspace. + After that happens once during a session, we always store the + active file on disk instead of in memory. (This policy may be + too aggressive.) */ +static int workspace_overflow = 0; /* Time at which vfm was last invoked. */ time_t last_vfm_invocation; @@ -157,7 +161,9 @@ procedure (void (*beginfunc) (void *), last_vfm_invocation = time (NULL); open_active_file (); - vfm_source->read (procedure_write_case, &procedure_write_data); + if (vfm_source != NULL) + vfm_source->class->read (vfm_source, + procedure_write_case, &procedure_write_data); close_active_file (&procedure_write_data); assert (--recursive_call == 0); @@ -197,9 +203,9 @@ process_active_file (void (*beginfunc) (void *), beginfunc (aux); /* There doesn't necessarily need to be an active file. */ - if (vfm_source) - vfm_source->read (process_active_file_write_case, - &process_active_write_data); + if (vfm_source != NULL) + vfm_source->class->read (vfm_source, process_active_file_write_case, + &process_active_write_data); endfunc (aux); close_active_file (&process_active_write_data); @@ -253,7 +259,7 @@ void process_active_file_output_case (void) { vfm_sink_info.ncases++; - vfm_sink->write (); + vfm_sink->class->write (vfm_sink, temp_case); } /* Opening the active file. */ @@ -285,19 +291,22 @@ prepare_for_writing (void) if (vfm_sink == NULL) { - if (vfm_sink_info.case_size * vfm_source_info.ncases > MAX_WORKSPACE - && !paging) + if (vfm_sink_info.case_size * vfm_source_info.ncases > set_max_workspace + && !workspace_overflow) { msg (MW, _("Workspace overflow predicted. Max workspace is " "currently set to %d KB (%d cases at %d bytes each). " - "Paging active file to disk."), - MAX_WORKSPACE / 1024, MAX_WORKSPACE / vfm_sink_info.case_size, + "Writing active file to disk."), + set_max_workspace / 1024, set_max_workspace / vfm_sink_info.case_size, vfm_sink_info.case_size); - paging = 1; + workspace_overflow = 1; } - - vfm_sink = paging ? &vfm_disk_stream : &vfm_memory_stream; + + if (workspace_overflow) + vfm_sink = create_case_sink (&disk_sink_class, NULL); + else + vfm_sink = create_case_sink (&memory_sink_class, NULL); } } @@ -333,8 +342,8 @@ arrange_compaction (void) else compaction_necessary = 0; - if (vfm_sink->init) - vfm_sink->init (); + if (vfm_sink->class->open != NULL) + vfm_sink->class->open (vfm_sink); } /* Prepares the temporary case and compaction case. */ @@ -511,18 +520,21 @@ close_active_file (struct write_case_data *data) finish_compaction (); /* Old data sink --> New data source. */ - if (vfm_source && vfm_source->destroy_source) - vfm_source->destroy_source (); - - vfm_source = vfm_sink; + if (vfm_source != NULL) + { + if (vfm_source->class->destroy != NULL) + vfm_source->class->destroy (vfm_source); + free (vfm_source); + } + + vfm_source = vfm_sink->class->make_source (vfm_sink); vfm_source_info.ncases = vfm_sink_info.ncases; vfm_source_info.nval = compaction_nval; vfm_source_info.case_size = (sizeof (struct ccase) + (compaction_nval - 1) * sizeof (union value)); - if (vfm_source->mode) - vfm_source->mode (); /* Old data sink is gone now. */ + free (vfm_sink); vfm_sink = NULL; /* Cancel TEMPORARY. */ @@ -557,16 +569,12 @@ close_active_file (struct write_case_data *data) /* Disk case stream. */ -/* Associated files. */ -FILE *disk_source_file; -FILE *disk_sink_file; - /* Initializes the disk sink. */ static void -disk_stream_init (void) +disk_sink_create (struct case_sink *sink) { - disk_sink_file = tmpfile (); - if (!disk_sink_file) + sink->aux = tmpfile (); + if (!sink->aux) { msg (ME, _("An error occurred attempting to create a temporary " "file for use as the active file: %s."), @@ -575,44 +583,21 @@ disk_stream_init (void) } } -/* Reads all cases from the disk source and passes them one by one to - write_case(). */ -static void -disk_stream_read (write_case_func *write_case, write_case_data wc_data) -{ - int i; - - for (i = 0; i < vfm_source_info.ncases; i++) - { - if (!fread (temp_case, vfm_source_info.case_size, 1, disk_source_file)) - { - msg (ME, _("An error occurred while attempting to read from " - "a temporary file created for the active file: %s."), - strerror (errno)); - err_failure (); - return; - } - - if (!write_case (wc_data)) - return; - } -} - /* Writes temp_case to the disk sink. */ static void -disk_stream_write (void) +disk_sink_write (struct case_sink *sink, struct ccase *c) { + FILE *file = sink->aux; union value *src_case; if (compaction_necessary) { - compact_case (compaction_case, temp_case); - src_case = (union value *) compaction_case; + compact_case (compaction_case, c); + src_case = compaction_case->data; } - else src_case = (union value *) temp_case; + else src_case = c->data; - if (fwrite (src_case, sizeof *src_case * compaction_nval, 1, - disk_sink_file) != 1) + if (fwrite (src_case, sizeof *src_case * compaction_nval, 1, file) != 1) { msg (ME, _("An error occurred while attempting to write to a " "temporary file used as the active file: %s."), @@ -621,12 +606,25 @@ disk_stream_write (void) } } -/* Switches the stream from a sink to a source. */ +/* Destroys the sink's internal data. */ static void -disk_stream_mode (void) +disk_sink_destroy (struct case_sink *sink) { - /* Rewind the sink. */ - if (fseek (disk_sink_file, 0, SEEK_SET) != 0) + FILE *file = sink->aux; + if (file != NULL) + fclose (file); +} + +/* Closes and destroys the sink and returns a disk source to read + back the written data. */ +static struct case_source * +disk_sink_make_source (struct case_sink *sink) +{ + FILE *file = sink->aux; + + /* Rewind the file. */ + assert (file != NULL); + if (fseek (file, 0, SEEK_SET) != 0) { msg (ME, _("An error occurred while attempting to rewind a " "temporary file used as the active file: %s."), @@ -634,107 +632,118 @@ disk_stream_mode (void) err_failure (); } - /* Sink --> source variables. */ - disk_source_file = disk_sink_file; + return create_case_source (&disk_source_class, file); } -/* Destroys the source's internal data. */ +/* Disk sink. */ +const struct case_sink_class disk_sink_class = + { + "disk", + disk_sink_create, + disk_sink_write, + disk_sink_destroy, + disk_sink_make_source, + }; + +/* Disk source. */ + +/* Reads all cases from the disk source and passes them one by one to + write_case(). */ static void -disk_stream_destroy_source (void) +disk_source_read (struct case_source *source, + write_case_func *write_case, write_case_data wc_data) { - if (disk_source_file) + FILE *file = source->aux; + int i; + + for (i = 0; i < vfm_source_info.ncases; i++) { - fclose (disk_source_file); - disk_source_file = NULL; + if (!fread (temp_case, vfm_source_info.case_size, 1, file)) + { + msg (ME, _("An error occurred while attempting to read from " + "a temporary file created for the active file: %s."), + strerror (errno)); + err_failure (); + return; + } + + if (!write_case (wc_data)) + return; } } -/* Destroys the sink's internal data. */ +/* Destroys the source's internal data. */ static void -disk_stream_destroy_sink (void) +disk_source_destroy (struct case_source *source) { - if (disk_sink_file) - { - fclose (disk_sink_file); - disk_sink_file = NULL; - } + FILE *file = source->aux; + if (file != NULL) + fclose (file); } -/* Disk stream. */ -struct case_stream vfm_disk_stream = +/* Disk source. */ +const struct case_source_class disk_source_class = { - disk_stream_init, - disk_stream_read, - disk_stream_write, - disk_stream_mode, - disk_stream_destroy_source, - disk_stream_destroy_sink, "disk", + disk_source_read, + disk_source_destroy, }; /* Memory case stream. */ -/* List of cases stored in the stream. */ -struct case_list *memory_source_cases; -struct case_list *memory_sink_cases; - -/* Current case. */ -struct case_list *memory_sink_iter; +/* Memory sink data. */ +struct memory_sink_info + { + int max_cases; /* Maximum cases before switching to disk. */ + struct case_list *head; /* First case in list. */ + struct case_list *tail; /* Last case in list. */ + }; -/* Maximum number of cases. */ -int memory_sink_max_cases; +/* Memory source data. */ +struct memory_source_info + { + struct case_list *cases; /* List of cases. */ + }; -/* Initializes the memory stream variables for writing. */ static void -memory_stream_init (void) +memory_sink_create (struct case_sink *sink) { - memory_sink_cases = NULL; - memory_sink_iter = NULL; + struct memory_sink_info *info; - assert (compaction_nval); - memory_sink_max_cases = MAX_WORKSPACE / (sizeof (union value) * compaction_nval); -} + sink->aux = info = xmalloc (sizeof *info); -/* Reads the case stream from memory and passes it to write_case(). */ -static void -memory_stream_read (write_case_func *write_case, write_case_data wc_data) -{ - while (memory_source_cases != NULL) - { - memcpy (temp_case, &memory_source_cases->c, vfm_source_info.case_size); - - { - struct case_list *current = memory_source_cases; - memory_source_cases = memory_source_cases->next; - free (current); - } - - if (!write_case (wc_data)) - return; - } + assert (compaction_nval > 0); + info->max_cases = set_max_workspace / (sizeof (union value) * compaction_nval); + info->head = info->tail = NULL; } -/* Writes temp_case to the memory stream. */ static void -memory_stream_write (void) +memory_sink_write (struct case_sink *sink, struct ccase *c) { - struct case_list *new_case = malloc (sizeof (struct case_list) - + ((compaction_nval - 1) - * sizeof (union value))); + struct memory_sink_info *info = sink->aux; + size_t case_size; + struct case_list *new_case; + + case_size = sizeof (struct case_list) + + ((compaction_nval - 1) * sizeof (union value)); + new_case = malloc (case_size); /* If we've got memory to spare then add it to the linked list. */ - if (vfm_sink_info.ncases <= memory_sink_max_cases && new_case != NULL) + if (vfm_sink_info.ncases <= info->max_cases && new_case != NULL) { - if (compaction_necessary) - compact_case (&new_case->c, temp_case); + /* Append case to linked list. */ + new_case->next = NULL; + if (info->head != NULL) + info->tail->next = new_case; else - memcpy (&new_case->c, temp_case, sizeof (union value) * compaction_nval); + info->head = new_case; + info->tail = new_case; - /* Append case to linked list. */ - if (memory_sink_cases) - memory_sink_iter = memory_sink_iter->next = new_case; + /* Copy data into case. */ + if (compaction_necessary) + compact_case (&new_case->c, c); else - memory_sink_iter = memory_sink_cases = new_case; + memcpy (&new_case->c, c, sizeof (union value) * compaction_nval); } else { @@ -743,36 +752,32 @@ memory_stream_write (void) /* Notify the user. */ if (!new_case) - msg (MW, _("Virtual memory exhausted. Paging active file " + msg (MW, _("Virtual memory exhausted. Writing active file " "to disk.")); else msg (MW, _("Workspace limit of %d KB (%d cases at %d bytes each) " - "overflowed. Paging active file to disk."), - MAX_WORKSPACE / 1024, memory_sink_max_cases, + "overflowed. Writing active file to disk."), + set_max_workspace / 1024, info->max_cases, compaction_nval * sizeof (union value)); free (new_case); /* Switch to a disk sink. */ - vfm_sink = &vfm_disk_stream; - vfm_sink->init (); - paging = 1; - - /* Terminate the list. */ - if (memory_sink_iter) - memory_sink_iter->next = NULL; + vfm_sink = create_case_sink (&disk_sink_class, NULL); + vfm_sink->class->open (vfm_sink); + workspace_overflow = 1; /* Write the cases to disk and destroy them. We can't call vfm->sink->write() because of compaction. */ - for (cur = memory_sink_cases; cur; cur = next) + for (cur = info->head; cur; cur = next) { next = cur->next; if (fwrite (cur->c.data, sizeof (union value) * compaction_nval, 1, - disk_sink_file) != 1) + vfm_sink->aux) != 1) { msg (ME, _("An error occurred while attempting to " "write to a temporary file created as the " - "active file, while paging to disk: %s."), + "active file: %s."), strerror (errno)); err_failure (); } @@ -780,36 +785,38 @@ memory_stream_write (void) } /* Write the current case to disk. */ - vfm_sink->write (); + vfm_sink->class->write (vfm_sink, c); } } /* If the data is stored in memory, causes it to be written to disk. To be called only *between* procedure()s, not within them. */ void -page_to_disk (void) +write_active_file_to_disk (void) { - if (vfm_source == &vfm_memory_stream) + if (case_source_is_class (vfm_source, &memory_source_class)) { + struct memory_source_info *info = vfm_source->aux; + /* Switch to a disk sink. */ - vfm_sink = &vfm_disk_stream; - vfm_sink->init (); - paging = 1; + vfm_sink = create_case_sink (&disk_sink_class, NULL); + vfm_sink->class->open (vfm_sink); + workspace_overflow = 1; /* Write the cases to disk and destroy them. We can't call vfm->sink->write() because of compaction. */ { struct case_list *cur, *next; - for (cur = memory_source_cases; cur; cur = next) + for (cur = info->cases; cur; cur = next) { next = cur->next; if (fwrite (cur->c.data, sizeof *cur->c.data * compaction_nval, 1, - disk_sink_file) != 1) + vfm_sink->aux) != 1) { msg (ME, _("An error occurred while attempting to " "write to a temporary file created as the " - "active file, while paging to disk: %s."), + "active file: %s."), strerror (errno)); err_failure (); } @@ -817,64 +824,107 @@ page_to_disk (void) } } - vfm_source = &vfm_disk_stream; - vfm_source->mode (); - + vfm_source = vfm_sink->class->make_source (vfm_sink); vfm_sink = NULL; } } -/* Switch the memory stream from sink to source mode. */ +/* Destroy all memory sink data. */ static void -memory_stream_mode (void) +memory_sink_destroy (struct case_sink *sink) { - /* Terminate the list. */ - if (memory_sink_iter) - memory_sink_iter->next = NULL; + struct memory_sink_info *info = sink->aux; + struct case_list *cur, *next; + + for (cur = info->head; cur; cur = next) + { + next = cur->next; + free (cur); + } + free (info); +} + +/* Switch the memory stream from sink to source mode. */ +static struct case_source * +memory_sink_make_source (struct case_sink *sink) +{ + struct memory_sink_info *sink_info = sink->aux; + struct memory_source_info *source_info; + + source_info = xmalloc (sizeof *source_info); + source_info->cases = sink_info->head; + + free (sink_info); - /* Sink --> source variables. */ - memory_source_cases = memory_sink_cases; - memory_sink_cases = NULL; + return create_case_source (&memory_source_class, source_info); } -/* Destroy all memory source data. */ +const struct case_sink_class memory_sink_class = + { + "memory", + memory_sink_create, + memory_sink_write, + memory_sink_destroy, + memory_sink_make_source, + }; + +/* Reads the case stream from memory and passes it to write_case(). */ static void -memory_stream_destroy_source (void) +memory_source_read (struct case_source *source, + write_case_func *write_case, write_case_data wc_data) { - struct case_list *cur, *next; - - for (cur = memory_source_cases; cur; cur = next) + struct memory_source_info *info = source->aux; + + while (info->cases != NULL) { - next = cur->next; - free (cur); + struct case_list *iter = info->cases; + info->cases = iter->next; + memcpy (temp_case, &iter->c, vfm_source_info.case_size); + free (iter); + + if (!write_case (wc_data)) + return; } - memory_source_cases = NULL; } -/* Destroy all memory sink data. */ +/* Destroy all memory source data. */ static void -memory_stream_destroy_sink (void) +memory_source_destroy (struct case_source *source) { + struct memory_source_info *info = source->aux; struct case_list *cur, *next; - for (cur = memory_sink_cases; cur; cur = next) + for (cur = info->cases; cur; cur = next) { next = cur->next; free (cur); } - memory_sink_cases = NULL; + free (info); } - + +struct case_list * +memory_source_get_cases (const struct case_source *source) +{ + struct memory_source_info *info = source->aux; + + return info->cases; +} + +void +memory_source_set_cases (const struct case_source *source, + struct case_list *cases) +{ + struct memory_source_info *info = source->aux; + + info->cases = cases; +} + /* Memory stream. */ -struct case_stream vfm_memory_stream = +const struct case_source_class memory_source_class = { - memory_stream_init, - memory_stream_read, - memory_stream_write, - memory_stream_mode, - memory_stream_destroy_source, - memory_stream_destroy_sink, "memory", + memory_source_read, + memory_source_destroy, }; #include "debug-print.h" @@ -935,7 +985,7 @@ procedure_write_case (write_case_data wc_data) lag_case (); vfm_sink_info.ncases++; - vfm_sink->write (); + vfm_sink->class->write (vfm_sink, temp_case); if (dict_get_case_limit (default_dict)) more_cases = (vfm_sink_info.ncases @@ -1233,4 +1283,36 @@ finish_compaction (void) dict_compact_values (default_dict); } - +struct case_source * +create_case_source (const struct case_source_class *class, void *aux) +{ + struct case_source *source = xmalloc (sizeof *source); + source->class = class; + source->aux = aux; + return source; +} + +int +case_source_is_complex (const struct case_source *source) +{ + return source != NULL && (source->class == &input_program_source_class + || source->class == &file_type_source_class); +} + +int +case_source_is_class (const struct case_source *source, + const struct case_source_class *class) +{ + return source != NULL && source->class == class; + +} + +struct case_sink * +create_case_sink (const struct case_sink_class *class, void *aux) +{ + struct case_sink *sink = xmalloc (sizeof *sink); + sink->class = class; + sink->aux = aux; + return sink; +} + diff --git a/src/vfm.h b/src/vfm.h index a9355636..f50b45ee 100644 --- a/src/vfm.h +++ b/src/vfm.h @@ -28,61 +28,86 @@ extern time_t last_vfm_invocation; /* This is the case that is to be filled in by input programs. */ extern struct ccase *temp_case; -/* `value' indexes to initialize to particular values for certain cases. */ -extern struct long_vec reinit_sysmis; /* SYSMIS for every case. */ -extern struct long_vec reinit_blanks; /* Blanks for every case. */ -extern struct long_vec init_zero; /* Zero for first case only. */ -extern struct long_vec init_blanks; /* Blanks for first case only. */ - typedef struct write_case_data *write_case_data; typedef int write_case_func (write_case_data); + +/* The current active file, from which cases are read. */ +extern struct case_source *vfm_source; + +/* A case source. */ +struct case_source + { + const struct case_source_class *class; /* Class. */ + void *aux; /* Auxiliary data. */ + }; -/* A case stream: either a source or a sink, depending on context. */ -struct case_stream +/* A case source class. */ +struct case_source_class { - /* Initializes sink. */ - void (*init) (void); + const char *name; /* Identifying name. */ /* Reads all the cases and calls WRITE_CASE passing the given AUX data for each one. */ - void (*read) (write_case_func *, write_case_data); - - /* Writes a single case, temp_case. */ - void (*write) (void); - - /* Switches mode from sink to source. */ - void (*mode) (void); - - /* Discards source's internal data. */ - void (*destroy_source) (void); + void (*read) (struct case_source *, write_case_func *, write_case_data); - /* Discards sink's internal data. */ - void (*destroy_sink) (void); - - /* Identifying name for the stream. */ - const char *name; + /* Destroys the source. */ + void (*destroy) (struct case_source *); }; -/* This is used to read from the active file. */ -extern struct case_stream *vfm_source; +extern const struct case_source_class memory_source_class; +extern const struct case_source_class disk_source_class; +extern const struct case_source_class data_list_source_class; +extern const struct case_source_class file_type_source_class; +extern const struct case_source_class input_program_source_class; +extern const struct case_source_class get_source_class; +extern const struct case_source_class import_source_class; +extern const struct case_source_class sort_source_class; + +struct case_source *create_case_source (const struct case_source_class *, + void *); +int case_source_is_complex (const struct case_source *); +int case_source_is_class (const struct case_source *, + const struct case_source_class *); +struct case_list *memory_source_get_cases (const struct case_source *); +void memory_source_set_cases (const struct case_source *, + struct case_list *); + +/* The replacement active file, to which cases are written. */ +extern struct case_sink *vfm_sink; + +/* A case sink. */ +struct case_sink + { + const struct case_sink_class *class; /* Class. */ + void *aux; /* Auxiliary data. */ + }; -/* This is used to write to the replacement active file. */ -extern struct case_stream *vfm_sink; +/* A case sink class. */ +struct case_sink_class + { + const char *name; /* Identifying name. */ + + /* Creates the sink and opens it for writing. */ + void (*open) (struct case_sink *); + + /* Writes a case to the sink. */ + void (*write) (struct case_sink *, struct ccase *); + + /* Closes and destroys the sink. */ + void (*destroy) (struct case_sink *); -/* General data streams. */ -extern struct case_stream vfm_memory_stream; -extern struct case_stream vfm_disk_stream; -extern struct case_stream sort_stream; -extern struct case_stream flip_stream; + /* Closes and destroys the sink and returns a source that can + read back the cases that were written, perhaps transformed + in some way. */ + struct case_source *(*make_source) (struct case_sink *); + }; -/* Streams that are only sources. */ -extern struct case_stream data_list_source; -extern struct case_stream input_program_source; -extern struct case_stream file_type_source; -extern struct case_stream get_source; -extern struct case_stream import_source; -extern struct case_stream matrix_data_source; +extern const struct case_sink_class memory_sink_class; +extern const struct case_sink_class disk_sink_class; +extern const struct case_sink_class sort_sink_class; +struct case_sink *create_case_sink (const struct case_sink_class *, void *); + /* Number of cases to lag. */ extern int n_lag; @@ -92,7 +117,7 @@ void procedure (void (*beginfunc) (void *aux), void *aux); struct ccase *lagged_case (int n_before); void compact_case (struct ccase *dest, const struct ccase *src); -void page_to_disk (void); +void write_active_file_to_disk (void); void process_active_file (void (*beginfunc) (void *), int (*casefunc) (struct ccase *curcase, void *), diff --git a/src/vfmP.h b/src/vfmP.h index 6454da89..eb52f0ed 100644 --- a/src/vfmP.h +++ b/src/vfmP.h @@ -22,13 +22,6 @@ #include "var.h" -/* Linked list of cases. */ -struct case_list - { - struct case_list *next; - struct ccase c; - }; - /* Describes a data stream, either a source or a sink. */ struct stream_info { @@ -43,18 +36,6 @@ extern struct stream_info vfm_source_info; /* Information about the data sink. */ extern struct stream_info vfm_sink_info; -/* Memory case stream. */ - -/* List of cases stored in the stream. */ -extern struct case_list *memory_source_cases; -extern struct case_list *memory_sink_cases; - -/* Current case. */ -extern struct case_list *memory_sink_iter; - -/* Maximum number of cases. */ -extern int memory_sink_max_cases; - /* Nonzero if the case needs to have values deleted before being stored, zero otherwise. */ extern int compaction_necessary; diff --git a/tests/command/print.sh b/tests/command/print.sh index fdfcf9cb..830e9858 100755 --- a/tests/command/print.sh +++ b/tests/command/print.sh @@ -112,7 +112,7 @@ if [ $? -ne 0 ] ; then fail ; fi activity="compare output" -diff -b -B $TEMPDIR/pspp.list - << EOF +diff -u -b -B $TEMPDIR/pspp.list - << EOF ---------------------------------------------------------------------- There is no test for DATA LIST FIXED since it is imagined that the rest of the tests give it a pretty good workout. diff --git a/tests/command/sort.sh b/tests/command/sort.sh index c53c1972..945fa5c9 100755 --- a/tests/command/sort.sh +++ b/tests/command/sort.sh @@ -50,7 +50,7 @@ activity="generate stat program" cat > $TEMPDIR/sort.stat < $TEMPDIR/sortsort +cp $TEMPDIR/pspp.list ~/pspp.list if [ $? -ne 0 ] ; then no_result ; fi -diff -B -b $TEMPDIR/sortsort $TEMPDIR/pspp.list +diff -u -B -b $TEMPDIR/sortsort $TEMPDIR/pspp.list if [ $? -ne 0 ] ; then fail ; fi # 2. It should be six elements wide -- 2.30.2