+Fri Dec 19 15:08:38 2003 Ben Pfaff <blp@gnu.org>
+
+ Get rid of AVL trees. Hashes are more appropriate for everything
+ PSPP does.
+
+ * Makefile.am: (pspp_SOURCES) Remove avl.c, avl.h.
+
+ * avl.c: Removed.
+
+ * avl.h: Removed.
+
+Fri Dec 19 14:33:31 2003 Ben Pfaff <blp@gnu.org>
+
+ Much code can be clarified by using C++ STL-like algorithms. Not
+ all uses of these algorithms are listed below, only the ones where
+ the change to an algorithm was the only change of interest.
+
+ * Makefile.am: (pspp_SOURCES) Add algorithm.c, algorithm.h.
+
+ * algorithm.c: New file.
+
+ * algorithm.h: New file.
+
+ * modify-vars.c: (static var forward) Removed.
+ (static var positional) Removed.
+ (compare_variables) Removed.
+ (struct ordering) New.
+ (cmd_modify_vars) Use sort() algorithm.
+ (compare_variables_given_ordering) New function.
+ (rearrange_dict) Use sort() algorithm.
+
+ * sysfile-info.c: (cmd_display) Use sort() algorithm.
+ (cmp_var_by_name) Removed.
+
+Fri Dec 19 14:26:17 2003 Ben Pfaff <blp@gnu.org>
+
+ Make file handles use a hash table.
+
+ * file-handle.q: (files) Change to hash table, make static.
+ (cmd_file_handle) Use hash table functions.
+ (fh_get_handle_by_filename) Ditto.
+ (fh_get_handle_by_name) Ditto.
+ (hash_file_handle) New function.
+ (cmp_file_handle) Rewrite.
+ (fh_init_files) Use hash table functions.
+
+Fri Dec 19 14:24:38 2003 Ben Pfaff <blp@gnu.org>
+
+ Clean up FREQUENCIES.
+
+ * Makefile.am: (pspp_SOURCES) Remove frequencies.g.
+
+ * frequencies.q: Remove a lot of old #if'd out code at the end.
+ (internal_cmd_frequencies) Use calc() instead of calc_no_weight()
+ or calc_weight(). Initialize percentile_values.
+ (calc) New function based on calc_weight() from frequencies.g.
+ (precalc) Use hash functions.
+ (static var comparison_func) Removed.
+ (static var comparison_param) Removed.
+ (comparison_helper) Removed.
+ (get_freq_comparator) New function.
+ (not_missing) New function.
+ (add_freq) Removed.
+ (postprocess_freq_tab) Use hash table functions, algorithms,
+ get_freq_comparator(). Rewrite.
+ (cleanup_freq_tab) Rephrase.
+ (add_percentile) Clean up spacing.
+ (hash_value_numeric) New function.
+ (hash_value_alpha) New function.
+ (compare_value_numeric_a) Rewrite.
+ (compare_value_alpha_a) Rewrite.
+ (compare_value_numeric_d) Rewrite.
+ (compare_value_alpha_d) Rewrite.
+ (compare_freq_numeric_a) Rewrite.
+ (compare_freq_alpha_a) Rewrite.
+ (compare_freq_numeric_d) Rewrite.
+ (compare_freq_alpha_d) Rewrite.
+ (calc_stats) Clean up mode, percentiles, max.
+ (dump_statistics) Clean up spacing.
+
+ * frequencies.g: Removed.
+
+ * var.h: (struct freq_tab) Change `data' to hash table.
+
+Fri Dec 19 14:15:46 2003 Ben Pfaff <blp@gnu.org>
+
+ * file-handle.h: Remove declaration of global variable `files',
+ which wasn't used anywhere.
+
+ * postscript.c: (add_encoding) Remove superfluous cast.
+ (line) Ditto.
+
+ * sfm-read.c: [linux] (bswap_int32) Drop ntohl() non-portable
+ version.
+
+ * temporary.c: [0] (display_tree) Removed.
+
+Fri Dec 19 14:13:04 2003 Ben Pfaff <blp@gnu.org>
+
+ Implement a new random number generator based on the alleged RC4
+ algorithm.
+
+ * expr-evl.c: (expr_evaluate) Use rng_get_double_normal() instead
+ of rand_normal().
+
+ * random.c: [!HAVE_GOOD_RANDOM] (real_rand) Removed.
+ [!HAVE_GOOD_RANDOM] (real_srand) Removed.
+ (macro k) Removed.
+ (static var V[]) Removed.
+ (static var Y) Removed.
+ (static var X2) Removed.
+ (setup_randomize) Removed.
+ (shuffle) Removed.
+ (rand_uniform) Removed.
+ (rand_normal) Removed.
+ (struct rng) New structure.
+ (rng_create) New function.
+ (rng_destroy) New function.
+ (swap_byte) New static function.
+ (rng_seed) New function.
+ (rng_get_bytes) New function.
+ (rng_get_int) New function.
+ (rng_get_unsigned) New function.
+ (rng_get_double) New function.
+ (rng_get_double_normal) New function.
+ (pspp_rng) New function.
+
+ * random.h: Sync up to random.c.
+
+ * sample.c: (struct sample_trns) Make `frac' unsigned and a
+ fraction of UINT_MAX, not 65536.
+ (cmd_sample) Use rng_get_unsigned(), rng_get_double(), UINT_MAX
+ fraction.
+
+ * vfm.c: (open_active_file) No need to call setup_randomize() any
+ longer.
+
+Fri Dec 19 12:05:56 2003 Ben Pfaff <blp@gnu.org>
+
+ Change dictionary name indexes to use hash tables instead of AVL
+ trees.
+
+ * crosstabs.q: (free_var_dict) Use hash tables.
+ (crs_custom_tables) Ditto.
+ (calc_general) Ditto.
+ (compare_table_entry) Rewrite.
+ (enum_var_values) Reorder parameters. All references updated.
+ Rewrite.
+
+ * get.c: (rename_variable) Use hash tables.
+ (mtf_merge_dictionary) Ditto.
+
+ * glob.c: (init_glob) Use hash tables.
+ (cmp_variable) Removed.
+
+ * means.q: (mns_custom_tables) Use hash tables.
+
+ * modify-vars.c: (rearrange_dict) Use hash tables.
+
+ * rename-vars.c: (cmd_rename_variables) Use hash tables.
+
+ * sfm-read.c: (read_header) Use hash tables.
+ (read_variables) Ditto.
+
+ * temporary.c: (new_dictionary) Use hash tables.
+ (save_dictionary) Ditto.
+ (restore_dictionary) Ditto.
+
+ * var.h: (struct dictionary) Change AVL tree `var_by_name' into
+ hash table `name_tab'.
+
+ * vars-atr.c: [DEBUGGING] (dump_one_var_node) Removed.
+ [DEBUGGING] (dump_var_tree) Removed.
+ (find_variable) Use hash tables.
+ (find_dict_variable) Ditto.
+ (common_init_stuff) Ditto.
+ (rename_variable) Ditto.
+ (clear_variable) Ditto. Also, remove debug code.
+ (dup_variable) Use hash tables.
+
+ * vars-prs.c: (fill_all_vars) Use hash tables.
+ (is_dict_varname) Ditto.
+ (parse_dict_variable) Ditto.
+
+Fri Dec 19 11:46:23 2003 Ben Pfaff <blp@gnu.org>
+
+ Change value labels to use hash tables instead of AVL trees, and
+ change value labels into an ADT.
+
+ * Makefile.am: (pspp_SOURCES) Add value-labels.c, value-labels.h.
+
+ * value-labels.c: New file.
+
+ * value-labels.h: New file.
+
+ * apply-dict.c: (cmd_apply_dictionary) Use value label ADT.
+ Get rid of a stupid use of goto.
+
+ * autorecode.c: (compare_alpha_value) Rewrite.
+ (hash_alpha_value) Ditto.
+ (compare_numeric_value) Rewrite.
+ (hash_numeric_value) Ditto.
+
+ * frequencies.q: (dump_full) Use value label ADT.
+
+ * pfm-read.c: (read_value_label) Use value label ADT.
+
+ * pfm-write.c: (write_value_labels) Use value label ADT.
+
+ * sfm-read.c: (read_variables) Use value label ADT.
+ (read_value_labels) Rewrite.
+
+ * sfm-write.c: (write_value_labels) Rewrite.
+
+ * sysfile-info.c: (cmd_sysfile_info) Use value label ADT.
+ (display_variables) Ditto.
+ (describe_variable) Ditto.
+
+ * t-test.q: (print_t_groups) Use value label ADT.
+
+ * temporary.c: (copy_variable) Use value label ADT.
+ (free_dictionary) Ditto.
+
+ * val-labs.c: (verify_val_labs) Use value label ADT.
+ (get_label) Ditto.
+ (debug_print) Ditto.
+ (val_lab_cmp) Removed.
+ (inc_ref_count) Removed.
+ (copy_value_labels) Removed.
+
+ * var.h: (struct value_label) Removed.
+ (struct variable) Change AVL tree `val_lab' into hash table
+ `val_labs'.
+
+ * vars-atr.c: (init_variable) Use value label ADT.
+ (clear_variable) Ditto.
+ (free_value_label) Removed.
+ (free_val_lab) Removed.
+ (get_val_lab) Removed.
+ (compare_variables) New function.
+ (hash_variable) New function.
+
+ * vfm.c: (dump_splits) Use value label ADT.
+
+Fri Dec 19 11:18:11 2003 Ben Pfaff <blp@gnu.org>
+
+ Add to the hash table interface.
+
+ * hash.c: (hsh_hash_bytes) Add assertion.
+ (hsh_hash_string) Ditto.
+ (hsh_clear) Ditto.
+ (hsh_rehash) Ditto.
+ (hsh_probe) Ditto.
+ (hsh_create) Ditto. Also make minimum `size'.
+ (hsh_destroy) Rephrase.
+ (sort_nulls_last) Removed.
+ (not_null) New function.
+ (hsh_data) Ditto.
+ (comparison_helper) Ditto.
+ (hsh_sort) Rewritten.
+ (hsh_data_copy) New function.
+ (hsh_sort_copy) Ditto.
+ (hsh_insert) Ditto.
+ (hsh_replace) Ditto.
+ (hsh_hash_double) Ditto.
+ (hsh_delete) Fix stupid bug.
+
Thu Dec 18 12:27:03 WAST 2003 John Darrington <john@darrington.wattle.id.au>
* added a calculation of the mode to FREQUENCIES
q_sources_q = correlations.q crosstabs.q descript.q file-handle.q \
frequencies.q list.q means.q set.q t-test.q
-pspp_SOURCES = aggregate.c alloc.c alloc.h apply-dict.c approx.h \
-ascii.c autorecode.c avl.c avl.h bitvector.h cases.c cases.h cmdline.c \
-command.c command.def command.h compute.c correlations.c count.c \
-crosstabs.c data-in.c data-in.h data-list.c data-out.c debug-print.h \
-descript.c dfm.c dfm.h do-if.c do-ifP.h error.c error.h expr-evl.c \
-expr-opt.c expr-prs.c expr.h exprP.h file-handle.c file-handle.h \
-file-type.c filename.c filename.h flip.c font.h format.c format.def \
-format.h formats.c frequencies.c frequencies.g get.c getline.c \
-getline.h glob.c groff-font.c hash.c hash.h heap.c heap.h html.c \
-htmlP.h include.c inpt-pgm.c inpt-pgm.h lexer.c lexer.h list.c log.h \
-loop.c magic.c magic.h main.c main.h matrix-data.c matrix.c matrix.h \
-means.c mis-val.c misc.c misc.h modify-vars.c numeric.c output.c \
-output.h pfm-read.c pfm-write.c pfm.h pool.c pool.h postscript.c \
-print.c quicksort.c quicksort.h random.c random.h recode.c \
-rename-vars.c repeat.c sample.c sel-if.c set.c settings.h sfm-read.c \
-sfm-write.c sfm.h sfmP.h som.c som.h sort.c sort.h split-file.c stat.h \
-stats.c stats.h str.c str.h sysfile-info.c tab.c tab.h temporary.c \
-title.c t-test.c val-labs.c var-labs.c var.h vars-atr.c vars-prs.c \
+pspp_SOURCES = aggregate.c algorithm.c algorithm.h alloc.c alloc.h \
+apply-dict.c approx.h ascii.c autorecode.c bitvector.h cases.c cases.h \
+cmdline.c command.c command.def command.h compute.c correlations.c \
+count.c crosstabs.c data-in.c data-in.h data-list.c data-out.c \
+debug-print.h descript.c dfm.c dfm.h do-if.c do-ifP.h error.c error.h \
+expr-evl.c expr-opt.c expr-prs.c expr.h exprP.h file-handle.c \
+file-handle.h file-type.c filename.c filename.h flip.c font.h format.c \
+format.def format.h formats.c frequencies.c get.c getline.c getline.h \
+glob.c groff-font.c hash.c hash.h heap.c heap.h html.c htmlP.h \
+include.c inpt-pgm.c inpt-pgm.h lexer.c lexer.h list.c log.h loop.c \
+magic.c magic.h main.c main.h matrix-data.c matrix.c matrix.h means.c \
+mis-val.c misc.c misc.h modify-vars.c numeric.c output.c output.h \
+pfm-read.c pfm-write.c pfm.h pool.c pool.h postscript.c print.c \
+random.c random.h recode.c rename-vars.c repeat.c sample.c sel-if.c \
+set.c settings.h sfm-read.c sfm-write.c sfm.h sfmP.h som.c som.h \
+sort.c sort.h split-file.c stat.h stats.c stats.h str.c str.h \
+sysfile-info.c tab.c tab.h temporary.c title.c t-test.c val-labs.c \
+value-labels.c value-labels.h var-labs.c var.h vars-atr.c vars-prs.c \
vector.c vector.h version.c version.h vfm.c vfm.h vfmP.h weight.c
pspp_LDADD = ../lib/julcal/libjulcal.a \
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@gnu.org>.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+/* Copyright (C) 2001 Free Software Foundation, Inc.
+
+ This file is part of the GNU ISO C++ Library. This library is free
+ software; you can redistribute it and/or modify it under the
+ terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this library; see the file COPYING. If not, write to the Free
+ Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ USA.
+
+ As a special exception, you may use this file as part of a free software
+ library without restriction. Specifically, if other files instantiate
+ templates or use macros or inline functions from this file, or you compile
+ this file and link it with other files to produce an executable, this
+ file does not by itself cause the resulting executable to be covered by
+ the GNU General Public License. This exception does not however
+ invalidate any other reasons why the executable file might be covered by
+ the GNU General Public License. */
+
+/*
+ *
+ * Copyright (c) 1994
+ * Hewlett-Packard Company
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies and
+ * that both that copyright notice and this permission notice appear
+ * in supporting documentation. Hewlett-Packard Company makes no
+ * representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied warranty.
+ *
+ *
+ * Copyright (c) 1996
+ * Silicon Graphics Computer Systems, Inc.
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies and
+ * that both that copyright notice and this permission notice appear
+ * in supporting documentation. Silicon Graphics makes no
+ * representations about the suitability of this software for any
+ * purpose. It is provided "as is" without express or implied warranty.
+ */
+
+#include <config.h>
+#include <stdlib.h>
+#include <string.h>
+#include "alloc.h"
+#include "algorithm.h"
+#include "random.h"
+\f
+/* Byte-wise swap two items of size SIZE. */
+#define SWAP(a, b, size) \
+ do \
+ { \
+ register size_t __size = (size); \
+ register char *__a = (a), *__b = (b); \
+ do \
+ { \
+ char __tmp = *__a; \
+ *__a++ = *__b; \
+ *__b++ = __tmp; \
+ } while (--__size > 0); \
+ } while (0)
+
+/* Makes the elements in ARRAY unique, by moving up duplicates,
+ and returns the new number of elements in the array. Sorted
+ arrays only. Arguments same as for sort() above. */
+size_t
+unique (void *array, size_t count, size_t size,
+ algo_compare_func *compare, void *aux)
+{
+ char *first = array;
+ char *last = first + size * count;
+ char *result = array;
+
+ for (;;)
+ {
+ first += size;
+ if (first >= last)
+ return count;
+
+ if (compare (result, first, aux))
+ {
+ result += size;
+ if (result != first)
+ memcpy (result, first, size);
+ }
+ else
+ count--;
+ }
+}
+
+/* Helper function that calls sort(), then unique(). */
+size_t
+sort_unique (void *array, size_t count, size_t size,
+ algo_compare_func *compare, void *aux)
+{
+ sort (array, count, size, compare, aux);
+ return unique (array, count, size, compare, aux);
+}
+
+#ifdef TEST_UNIQUE
+#include <stdio.h>
+
+void *
+xmalloc (size_t size)
+{
+ return malloc (size);
+}
+
+int
+compare_ints (const void *a_, const void *b_, void *aux)
+{
+ const int *a = a_;
+ const int *b = b_;
+
+ if (*a > *b)
+ return 1;
+ else if (*a < *b)
+ return 1;
+ else
+ return 0;
+}
+
+void
+try_unique (const char *title,
+ int *in, size_t in_cnt,
+ size_t out_cnt)
+{
+ size_t i;
+
+ in_cnt = unique (in, in_cnt, sizeof *in, compare_ints, NULL);
+ if (in_cnt != out_cnt)
+ {
+ fprintf (stderr, "unique_test: %s: in_cnt %d, expected %d\n",
+ title, (int) in_cnt, (int) out_cnt);
+ return;
+ }
+
+ for (i = 0; i < out_cnt; i++)
+ {
+ if (in[i] != i)
+ fprintf (stderr, "unique_test: %s: idx %d = %d, expected %d\n",
+ title, (int) i, in[i], i);
+ }
+}
+
+int
+main (void)
+{
+ int a_in[] = {0, 0, 0, 1, 2, 3, 3, 4, 5, 5};
+ int b_in[] = {0, 1, 2, 2, 2, 3};
+ int c_in[] = {0};
+ int d_in;
+
+ try_unique ("a", a_in, sizeof a_in / sizeof *a_in, 6);
+ try_unique ("b", b_in, sizeof b_in / sizeof *b_in, 4);
+ try_unique ("c", c_in, sizeof c_in / sizeof *c_in, 1);
+ try_unique ("d", &d_in, 0, 0);
+
+}
+#endif /* TEST_UNIQUE */
+\f
+/* Reorders ARRAY, which contains COUNT elements of SIZE bytes
+ each, so that the elements for which PREDICATE returns nonzero
+ precede those for which PREDICATE returns zero. AUX is
+ passed to each predicate as auxiliary data. Returns the
+ number of elements for which PREDICATE returns nonzero. Not
+ stable. */
+size_t
+partition (void *array, size_t count, size_t size,
+ algo_predicate_func *predicate, void *aux)
+{
+ char *first = array;
+ char *last = first + count * size;
+
+ for (;;)
+ {
+ /* Move FIRST forward to point to first element that fails
+ PREDICATE. */
+ for (;;)
+ {
+ if (first == last)
+ return count;
+ else if (!predicate (first, aux))
+ break;
+
+ first += size;
+ }
+ count--;
+
+ /* Move LAST backward to point to last element that passes
+ PREDICATE. */
+ for (;;)
+ {
+ last -= size;
+
+ if (first == last)
+ return count;
+ else if (predicate (last, aux))
+ break;
+ else
+ count--;
+ }
+
+ /* By swapping FIRST and LAST we extend the starting and
+ ending sequences that pass and fail, respectively,
+ PREDICATE. */
+ SWAP (first, last, size);
+ first += size;
+ }
+}
+\f
+/* A algo_random_func that uses random.h. */
+unsigned
+algo_default_random (unsigned max, void *aux unused)
+{
+ return rng_get_unsigned (pspp_rng ()) % max;
+}
+
+/* Randomly reorders ARRAY, which contains COUNT elements of SIZE
+ bytes each. Uses RANDOM as a source of random data, passing
+ AUX as the auxiliary data. RANDOM may be null to use a
+ default random source. */
+void
+random_shuffle (void *array_, size_t count, size_t size,
+ algo_random_func *random, void *aux)
+{
+ unsigned char *array = array_;
+ int i;
+
+ if (random == NULL)
+ random = algo_default_random;
+
+ for (i = 1; i < count; i++)
+ SWAP (array + i * size, array + random (i + 1, aux) * size, size);
+}
+\f
+/* Copies the COUNT elements of SIZE bytes each from ARRAY to
+ RESULT, except that elements for which PREDICATE is false are
+ not copied. Returns the number of elements copied. AUX is
+ passed to PREDICATE as auxiliary data. */
+size_t
+copy_if (const void *array, size_t count, size_t size,
+ void *result,
+ algo_predicate_func *predicate, void *aux)
+{
+ const unsigned char *input = array;
+ const unsigned char *last = input + size * count;
+ unsigned char *output = result;
+
+ while (input <= last)
+ {
+ if (predicate (input, aux))
+ {
+ memcpy (output, input, size);
+ output += size;
+ }
+ else
+ count--;
+
+ input += size;
+ }
+
+ return count;
+}
+
+/* A predicate and its auxiliary data. */
+struct pred_aux
+ {
+ algo_predicate_func *predicate;
+ void *aux;
+ };
+
+static int
+not (const void *data, void *pred_aux_)
+{
+ const struct pred_aux *pred_aux = pred_aux_;
+
+ return !pred_aux->predicate (data, pred_aux->aux);
+}
+
+/* Copies the COUNT elements of SIZE bytes each from ARRAY to
+ RESULT, except that elements for which PREDICATE is true are
+ not copied. Returns the number of elements copied. AUX is
+ passed to PREDICATE as auxiliary data. */
+size_t
+remove_copy_if (const void *array, size_t count, size_t size,
+ void *result,
+ algo_predicate_func *predicate, void *aux)
+{
+ struct pred_aux pred_aux;
+ pred_aux.predicate = predicate;
+ pred_aux.aux = aux;
+ return copy_if (array, count, size, result, not, &pred_aux);
+}
+\f
+/* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* If you consider tuning this algorithm, you should consult first:
+ Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
+ Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
+
+#include <alloca.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Discontinue quicksort algorithm when partition gets below this size.
+ This particular magic number was chosen to work best on a Sun 4/260. */
+#define MAX_THRESH 4
+
+/* Stack node declarations used to store unfulfilled partition obligations. */
+typedef struct
+ {
+ char *lo;
+ char *hi;
+ } stack_node;
+
+/* The next 4 #defines implement a very fast in-line stack abstraction. */
+/* The stack needs log (total_elements) entries (we could even subtract
+ log(MAX_THRESH)). Since total_elements has type size_t, we get as
+ upper bound for log (total_elements):
+ bits per byte (CHAR_BIT) * sizeof(size_t). */
+#define STACK_SIZE (CHAR_BIT * sizeof(size_t))
+#define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
+#define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
+#define STACK_NOT_EMPTY (stack < top)
+
+
+/* Order size using quicksort. This implementation incorporates
+ four optimizations discussed in Sedgewick:
+
+ 1. Non-recursive, using an explicit stack of pointer that store the
+ next array partition to sort. To save time, this maximum amount
+ of space required to store an array of SIZE_MAX is allocated on the
+ stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
+ only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
+ Pretty cheap, actually.
+
+ 2. Chose the pivot element using a median-of-three decision tree.
+ This reduces the probability of selecting a bad pivot value and
+ eliminates certain extraneous comparisons.
+
+ 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
+ insertion sort to order the MAX_THRESH items within each partition.
+ This is a big win, since insertion sort is faster for small, mostly
+ sorted array segments.
+
+ 4. The larger of the two sub-partitions is always pushed onto the
+ stack first, with the algorithm then concentrating on the
+ smaller partition. This *guarantees* no more than log (total_elems)
+ stack size is needed (actually O(1) in this case)! */
+
+void
+sort (void *const pbase, size_t total_elems, size_t size,
+ algo_compare_func *cmp, void *aux)
+{
+ register char *base_ptr = (char *) pbase;
+
+ const size_t max_thresh = MAX_THRESH * size;
+
+ if (total_elems == 0)
+ /* Avoid lossage with unsigned arithmetic below. */
+ return;
+
+ if (total_elems > MAX_THRESH)
+ {
+ char *lo = base_ptr;
+ char *hi = &lo[size * (total_elems - 1)];
+ stack_node stack[STACK_SIZE];
+ stack_node *top = stack + 1;
+
+ while (STACK_NOT_EMPTY)
+ {
+ char *left_ptr;
+ char *right_ptr;
+
+ /* Select median value from among LO, MID, and HI. Rearrange
+ LO and HI so the three values are sorted. This lowers the
+ probability of picking a pathological pivot value and
+ skips a comparison for both the LEFT_PTR and RIGHT_PTR in
+ the while loops. */
+
+ char *mid = lo + size * ((hi - lo) / size >> 1);
+
+ if ((*cmp) ((void *) mid, (void *) lo, aux) < 0)
+ SWAP (mid, lo, size);
+ if ((*cmp) ((void *) hi, (void *) mid, aux) < 0)
+ SWAP (mid, hi, size);
+ else
+ goto jump_over;
+ if ((*cmp) ((void *) mid, (void *) lo, aux) < 0)
+ SWAP (mid, lo, size);
+ jump_over:;
+
+ left_ptr = lo + size;
+ right_ptr = hi - size;
+
+ /* Here's the famous ``collapse the walls'' section of quicksort.
+ Gotta like those tight inner loops! They are the main reason
+ that this algorithm runs much faster than others. */
+ do
+ {
+ while ((*cmp) ((void *) left_ptr, (void *) mid, aux) < 0)
+ left_ptr += size;
+
+ while ((*cmp) ((void *) mid, (void *) right_ptr, aux) < 0)
+ right_ptr -= size;
+
+ if (left_ptr < right_ptr)
+ {
+ SWAP (left_ptr, right_ptr, size);
+ if (mid == left_ptr)
+ mid = right_ptr;
+ else if (mid == right_ptr)
+ mid = left_ptr;
+ left_ptr += size;
+ right_ptr -= size;
+ }
+ else if (left_ptr == right_ptr)
+ {
+ left_ptr += size;
+ right_ptr -= size;
+ break;
+ }
+ }
+ while (left_ptr <= right_ptr);
+
+ /* Set up pointers for next iteration. First determine whether
+ left and right partitions are below the threshold size. If so,
+ ignore one or both. Otherwise, push the larger partition's
+ bounds on the stack and continue sorting the smaller one. */
+
+ if ((size_t) (right_ptr - lo) <= max_thresh)
+ {
+ if ((size_t) (hi - left_ptr) <= max_thresh)
+ /* Ignore both small partitions. */
+ POP (lo, hi);
+ else
+ /* Ignore small left partition. */
+ lo = left_ptr;
+ }
+ else if ((size_t) (hi - left_ptr) <= max_thresh)
+ /* Ignore small right partition. */
+ hi = right_ptr;
+ else if ((right_ptr - lo) > (hi - left_ptr))
+ {
+ /* Push larger left partition indices. */
+ PUSH (lo, right_ptr);
+ lo = left_ptr;
+ }
+ else
+ {
+ /* Push larger right partition indices. */
+ PUSH (left_ptr, hi);
+ hi = right_ptr;
+ }
+ }
+ }
+
+ /* Once the BASE_PTR array is partially sorted by quicksort the rest
+ is completely sorted using insertion sort, since this is efficient
+ for partitions below MAX_THRESH size. BASE_PTR points to the beginning
+ of the array to sort, and END_PTR points at the very last element in
+ the array (*not* one beyond it!). */
+
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
+ {
+ char *const end_ptr = &base_ptr[size * (total_elems - 1)];
+ char *tmp_ptr = base_ptr;
+ char *thresh = min(end_ptr, base_ptr + max_thresh);
+ register char *run_ptr;
+
+ /* Find smallest element in first threshold and place it at the
+ array's beginning. This is the smallest array element,
+ and the operation speeds up insertion sort's inner loop. */
+
+ for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
+ if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, aux) < 0)
+ tmp_ptr = run_ptr;
+
+ if (tmp_ptr != base_ptr)
+ SWAP (tmp_ptr, base_ptr, size);
+
+ /* Insertion sort, running from left-hand-side up to right-hand-side. */
+
+ run_ptr = base_ptr + size;
+ while ((run_ptr += size) <= end_ptr)
+ {
+ tmp_ptr = run_ptr - size;
+ while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr, aux) < 0)
+ tmp_ptr -= size;
+
+ tmp_ptr += size;
+ if (tmp_ptr != run_ptr)
+ {
+ char *trav;
+
+ trav = run_ptr + size;
+ while (--trav >= run_ptr)
+ {
+ char c = *trav;
+ char *hi, *lo;
+
+ for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
+ *hi = *lo;
+ *hi = c;
+ }
+ }
+ }
+ }
+}
--- /dev/null
+#ifndef SORT_ALGO_H
+#define SORT_ALGO_H 1
+
+#include <stddef.h>
+
+/* Compares A and B, given auxiliary data AUX, and returns a
+ strcmp()-type result. */
+typedef int algo_compare_func (const void *a, const void *b, void *aux);
+
+/* Tests a predicate on DATA, given auxiliary data AUX, and
+ returns nonzero if true or zero if false. */
+typedef int algo_predicate_func (const void *data, void *aux);
+
+/* Returns a random number in the range 0 through MAX exclusive,
+ given auxiliary data AUX. */
+typedef unsigned algo_random_func (unsigned max, void *aux);
+
+/* A generally suitable random function. */
+algo_random_func algo_default_random;
+
+/* Sorts ARRAY, which contains COUNT elements of SIZE bytes each,
+ using COMPARE for comparisons. AUX is passed to each
+ comparison as auxiliary data. */
+void sort (void *array, size_t count, size_t size,
+ algo_compare_func *compare, void *aux);
+
+/* Makes the elements in ARRAY unique, by moving up duplicates,
+ and returns the new number of elements in the array. Sorted
+ arrays only. Arguments same as for sort() above. */
+size_t unique (void *array, size_t count, size_t size,
+ algo_compare_func *compare, void *aux);
+
+/* Helper function that calls sort(), then unique(). */
+size_t sort_unique (void *array, size_t count, size_t size,
+ algo_compare_func *compare, void *aux);
+
+/* Reorders ARRAY, which contains COUNT elements of SIZE bytes
+ each, so that the elements for which PREDICATE returns nonzero
+ precede those for which PREDICATE returns zero. AUX is passed
+ as auxiliary data to PREDICATE. Returns the number of
+ elements for which PREDICATE returns nonzero. Not stable. */
+size_t partition (void *array, size_t count, size_t size,
+ algo_predicate_func *predicate, void *aux);
+
+/* Randomly reorders ARRAY, which contains COUNT elements of SIZE
+ bytes each. Uses RANDOM as a source of random data, passing
+ AUX as the auxiliary data. RANDOM may be null to use a
+ default random source. */
+void random_shuffle (void *array, size_t count, size_t size,
+ algo_random_func *random, void *aux);
+
+/* Copies the COUNT elements of SIZE bytes each from ARRAY to
+ RESULT, except that elements for which PREDICATE is false are
+ not copied. Returns the number of elements copied. AUX is
+ passed to PREDICATE as auxiliary data. */
+size_t copy_if (const void *array, size_t count, size_t size,
+ void *result,
+ algo_predicate_func *predicate, void *aux);
+
+/* Copies the COUNT elements of SIZE bytes each from ARRAY to
+ RESULT, except that elements for which PREDICATE is true are
+ not copied. Returns the number of elements copied. AUX is
+ passed to PREDICATE as auxiliary data. */
+size_t remove_copy_if (const void *array, size_t count, size_t size,
+ void *result,
+ algo_predicate_func *predicate, void *aux);
+
+#endif /* sort-algo.h */
#include <config.h>
#include <stdlib.h>
-#include "avl.h"
#include "command.h"
#include "error.h"
#include "file-handle.h"
+#include "hash.h"
#include "lexer.h"
#include "sfm.h"
#include "str.h"
+#include "value-labels.h"
#include "var.h"
#include "debug-print.h"
s->label = NULL;
}
- if (s->val_lab && t->width > MAX_SHORT_STRING)
+ if (val_labs_count (s->val_labs) && t->width > MAX_SHORT_STRING)
msg (SW, _("Cannot add value labels from source file to "
"long string variable %s."),
s->name);
- else if (s->val_lab)
+ else if (val_labs_count (s->val_labs))
{
+ /* Whether to apply the value labels. */
+ int apply = 1;
+
if (t->width < s->width)
{
- avl_traverser iter;
- struct value_label *lab;
+ struct val_labs_iterator *i;
+ struct val_lab *lab;
- avl_traverser_init (iter);
- while ((lab = avl_traverse (s->val_lab, &iter)) != NULL)
+ for (lab = val_labs_first (s->val_labs, &i); lab != NULL;
+ lab = val_labs_next (s->val_labs, &i))
{
int j;
- /* If the truncated characters aren't all blanks
- anyway, then don't apply the value labels. */
+ /* We will apply the value labels only if all
+ the truncated characters are blanks. */
for (j = t->width; j < s->width; j++)
- if (lab->v.s[j] != ' ')
- goto skip_value_labels;
+ if (lab->value.s[j] != ' ')
+ {
+ val_labs_done (&i);
+ apply = 0;
+ break;
+ }
}
}
else
label values are right-padded with spaces, so it is
unnecessary to bother padding values here. */
}
-
- avl_destroy (t->val_lab, free_val_lab);
- t->val_lab = s->val_lab;
- s->val_lab = NULL;
+
+ if (apply)
+ {
+ val_labs_destroy (t->val_labs);
+ t->val_labs = s->val_labs;
+ val_labs_set_width (t->val_labs, t->width);
+ s->val_labs = val_labs_create (s->width);
+ }
}
- skip_value_labels: ;
if (s->miss_type != MISSING_NONE && t->width > MAX_SHORT_STRING)
msg (SW, _("Cannot apply missing values from source file to "
static int autorecode_trns_proc (struct trns_header *, struct ccase *);
static void autorecode_trns_free (struct trns_header *);
static int autorecode_proc_func (struct ccase *);
-static int compare_alpha_value (const void *, const void *, void *);
-static unsigned hash_alpha_value (const void *, void *);
-static int compare_numeric_value (const void *, const void *, void *);
-static unsigned hash_numeric_value (const void *, void *);
+static hsh_compare_func compare_alpha_value, compare_numeric_value;
+static hsh_hash_func hash_alpha_value, hash_numeric_value;
static void recode (void);
/* Performs the AUTORECODE procedure. */
for (i = 0; i < nv_dest; i++)
if (v_src[i]->type == ALPHA)
h_trans[i] = hsh_create (10, compare_alpha_value,
- hash_alpha_value, NULL,
- (void *) v_src[i]->width);
+ hash_alpha_value, NULL, v_src[i]);
else
h_trans[i] = hsh_create (10, compare_numeric_value,
hash_numeric_value, NULL, NULL);
if (v_src[i]->type == ALPHA)
spec->items = hsh_create (2 * count, compare_alpha_value,
- hash_alpha_value, NULL,
- (void *) v_src[i]->width);
+ hash_alpha_value, NULL, v_src[i]);
else
spec->items = hsh_create (2 * count, compare_numeric_value,
hash_numeric_value, NULL, NULL);
/* AUTORECODE procedure. */
static int
-compare_alpha_value (const void *a, const void *b, void *len)
+compare_alpha_value (const void *a_, const void *b_, void *v_)
{
- return memcmp (((union value *) a)->c, ((union value *) b)->c, (int) len);
+ const union value *a = a_;
+ const union value *b = b_;
+ const struct variable *v = v_;
+
+ return memcmp (a->c, b->c, v->width);
}
static unsigned
-hash_alpha_value (const void *a_, void *len)
+hash_alpha_value (const void *a_, void *v_)
{
const union value *a = a_;
- return hsh_hash_bytes (a->c, (int) len);
+ const struct variable *v = v_;
+
+ return hsh_hash_bytes (a->c, v->width);
}
static int
-compare_numeric_value (const void *pa, const void *pb, void *foobar unused)
+compare_numeric_value (const void *a_, const void *b_, void *foo unused)
{
- double a = ((union value *) pa)->f, b = ((union value *) pb)->f;
- return a > b ? 1 : (a < b ? -1 : 0);
+ const union value *a = a_;
+ const union value *b = b_;
+
+ return a->f < b->f ? -1 : a->f > b->f;
}
static unsigned
-hash_numeric_value (const void *a_, void *len unused)
+hash_numeric_value (const void *a_, void *foo unused)
{
const union value *a = a_;
- return hsh_hash_bytes (&a->f, sizeof a->f);
+
+ return hsh_hash_double (a->f);
}
static int
{
v.f = c->data[v_src[i]->fv].f;
vpp = (union value **) hsh_probe (h_trans[i], &v);
- if (NULL == *vpp)
+ if (*vpp == NULL)
{
vp = pool_alloc (hash_pool, sizeof (union value));
vp->f = v.f;
{
v.c = c->data[v_src[i]->fv].s;
vpp = (union value **) hsh_probe (h_trans[i], &v);
- if (NULL == *vpp)
+ if (*vpp == NULL)
{
vp = pool_alloc (hash_pool, sizeof (union value));
vp->c = pool_strndup (hash_pool, v.c, v_src[i]->width);
+++ /dev/null
-/* libavl - manipulates AVL trees.
- Copyright (C) 1998-9, 2000 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA.
-
- The author may be contacted at <pfaffben@pilot.msu.edu> on the
- Internet, or as Ben Pfaff, 12167 Airport Rd, DeWitt MI 48820, USA
- through more mundane means. */
-
-/* This is file avl.c in libavl. */
-
-#if HAVE_CONFIG_H
-#include <config.h>
-#endif
-#if PSPP
-#include "pool.h"
-#define HAVE_XMALLOC 1
-#endif
-#if SELF_TEST
-#include <limits.h>
-#include <time.h>
-#endif
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include "avl.h"
-
-#if !PSPP && !__GCC__
-#define inline
-#endif
-
-#if !PSPP
-#if __GNUC__ >= 2
-#define unused __attribute__ ((unused))
-#else
-#define unused
-#endif
-#endif
-
-#ifdef HAVE_XMALLOC
-void *xmalloc (size_t);
-#else /* !HAVE_XMALLOC */
-/* Allocates SIZE bytes of space using malloc(). Aborts if out of
- memory. */
-static void *
-xmalloc (size_t size)
-{
- void *vp;
-
- if (size == 0)
- return NULL;
- vp = malloc (size);
-
- assert (vp != NULL);
- if (vp == NULL)
- {
- fprintf (stderr, "virtual memory exhausted\n");
- exit (EXIT_FAILURE);
- }
- return vp;
-}
-#endif /* !HAVE_XMALLOC */
-
-/* Creates an AVL tree in POOL (which can be NULL). POOL is owned by
- the caller, not by the AVL tree. CMP is a order function for the
- data to be stored in the tree. PARAM is arbitrary data that
- becomes an argument to the comparison function. */
-avl_tree *
-avl_create (MAYBE_POOL avl_comparison_func cmp, void *param)
-{
- avl_tree *tree;
-
- assert (cmp != NULL);
-#if PSPP
- if (pool)
- tree = pool_alloc (pool, sizeof *tree);
- else
-#endif
- tree = xmalloc (sizeof *tree);
-
-#if PSPP
- tree->pool = pool;
-#endif
- tree->root.link[0] = NULL;
- tree->root.link[1] = NULL;
- tree->cmp = cmp;
- tree->count = 0;
- tree->param = param;
-
- return tree;
-}
-
-/* Destroy tree TREE. Function FREE_FUNC is called for every node in
- the tree as it is destroyed.
-
- No effect if the tree has an pool owner and free_func is NULL.
- The caller owns the pool and must destroy it itself.
-
- Do not attempt to reuse the tree after it has been freed. Create a
- new one. */
-void
-avl_destroy (avl_tree *tree, avl_node_func free_func)
-{
- assert (tree != NULL);
-
-#if PSPP
- if (free_func || tree->pool == NULL)
-#endif
- {
- /* Uses Knuth's Algorithm 2.3.1T as modified in exercise 13
- (postorder traversal). */
-
- /* T1. */
- avl_node *an[AVL_MAX_HEIGHT]; /* Stack A: nodes. */
- char ab[AVL_MAX_HEIGHT]; /* Stack A: bits. */
- int ap = 0; /* Stack A: height. */
- avl_node *p = tree->root.link[0];
-
- for (;;)
- {
- /* T2. */
- while (p != NULL)
- {
- /* T3. */
- ab[ap] = 0;
- an[ap++] = p;
- p = p->link[0];
- }
-
- /* T4. */
- for (;;)
- {
- if (ap == 0)
- goto done;
-
- p = an[--ap];
- if (ab[ap] == 0)
- {
- ab[ap++] = 1;
- p = p->link[1];
- break;
- }
-
- if (free_func)
- free_func (p->data, tree->param);
-#if PSPP
- if (tree->pool == NULL)
-#endif
- free (p);
- }
- }
- }
-
- done:
-#if PSPP
- if (tree->pool == NULL)
-#endif
- free (tree);
-}
-
-/* avl_destroy() with FREE_FUNC hardcoded as free(). */
-void
-avl_free (avl_tree *tree)
-{
- avl_destroy (tree, (avl_node_func) free);
-}
-
-/* Return the number of nodes in TREE. */
-int
-avl_count (const avl_tree *tree)
-{
- assert (tree != NULL);
- return tree->count;
-}
-
-/* Allocates room for a new avl_node in POOL, or using xmalloc() if
- POOL is NULL. */
-#if PSPP
-static inline avl_node *
-new_node (struct pool *pool)
-{
- if (pool != NULL)
- return pool_alloc (pool, sizeof (avl_node));
- else
- return xmalloc (sizeof (avl_node));
-}
-#else
-static inline avl_node *
-new_node (void)
-{
- return xmalloc (sizeof (avl_node));
-}
-
-#define new_node(POOL) \
- new_node ()
-#endif
-
-/* Copy the contents of TREE to a new tree in POOL. If COPY is
- non-NULL, then each data item is passed to function COPY, and the
- return values are inserted into the new tree; otherwise, the items
- are copied verbatim from the old tree to the new tree. Returns the
- new tree. */
-avl_tree *
-avl_copy (MAYBE_POOL const avl_tree *tree, avl_copy_func copy)
-{
- /* This is a combination of Knuth's Algorithm 2.3.1C (copying a
- binary tree) and Algorithm 2.3.1T as modified by exercise 12
- (preorder traversal). */
-
- avl_tree *new_tree;
-
- /* PT1. */
- const avl_node *pa[AVL_MAX_HEIGHT]; /* Stack PA: nodes. */
- const avl_node **pp = pa; /* Stack PA: stack pointer. */
- const avl_node *p = &tree->root;
-
- /* QT1. */
- avl_node *qa[AVL_MAX_HEIGHT]; /* Stack QA: nodes. */
- avl_node **qp = qa; /* Stack QA: stack pointer. */
- avl_node *q;
-
- assert (tree != NULL);
-#if PSPP
- new_tree = avl_create (pool, tree->cmp, tree->param);
-#else
- new_tree = avl_create (tree->cmp, tree->param);
-#endif
- new_tree->count = tree->count;
- q = &new_tree->root;
-
- for (;;)
- {
- /* C4. */
- if (p->link[0] != NULL)
- {
- avl_node *r = new_node (pool);
- r->link[0] = r->link[1] = NULL;
- q->link[0] = r;
- }
-
- /* C5: Find preorder successors of P and Q. */
- goto start;
- for (;;)
- {
- /* PT2. */
- while (p != NULL)
- {
- goto escape;
- start:
- /* PT3. */
- *pp++ = p;
- *qp++ = q;
- p = p->link[0];
- q = q->link[0];
- }
-
- /* PT4. */
- if (pp == pa)
- {
- assert (qp == qa);
- return new_tree;
- }
-
- p = *--pp;
- q = *--qp;
-
- /* PT5. */
- p = p->link[1];
- q = q->link[1];
- }
- escape:
-
- /* C2. */
- if (p->link[1])
- {
- avl_node *r = new_node (pool);
- r->link[0] = r->link[1] = NULL;
- q->link[1] = r;
- }
-
- /* C3. */
- q->bal = p->bal;
- if (copy == NULL)
- q->data = p->data;
- else
- q->data = copy (p->data, tree->param);
- }
-}
-
-/* Walk tree TREE in inorder, calling WALK_FUNC at each node. Passes
- PARAM to WALK_FUNC. */
-void
-avl_walk (const avl_tree *tree, avl_node_func walk_func, void *param)
-{
- /* Uses Knuth's algorithm 2.3.1T (inorder traversal). */
- assert (tree && walk_func);
-
- {
- /* T1. */
- const avl_node *an[AVL_MAX_HEIGHT]; /* Stack A: nodes. */
- const avl_node **ap = an; /* Stack A: stack pointer. */
- const avl_node *p = tree->root.link[0];
-
- for (;;)
- {
- /* T2. */
- while (p != NULL)
- {
- /* T3. */
- *ap++ = p;
- p = p->link[0];
- }
-
- /* T4. */
- if (ap == an)
- return;
- p = *--ap;
-
- /* T5. */
- walk_func (p->data, param);
- p = p->link[1];
- }
- }
-}
-
-/* Each call to this function for a given TREE and TRAV return the
- next item in the tree in inorder. Initialize the first element of
- TRAV (init) to 0 before calling the first time. Returns NULL when
- out of elements. */
-void *
-avl_traverse (const avl_tree *tree, avl_traverser *trav)
-{
- assert (tree && trav);
-
- /* Uses Knuth's algorithm 2.3.1T (inorder traversal). */
- if (trav->init == 0)
- {
- /* T1. */
- trav->init = 1;
- trav->nstack = 0;
- trav->p = tree->root.link[0];
- }
- else
- /* T5. */
- trav->p = trav->p->link[1];
-
- for (;;)
- {
- /* T2. */
- while (trav->p != NULL)
- {
- /* T3. */
- trav->stack[trav->nstack++] = trav->p;
- trav->p = trav->p->link[0];
- }
-
- /* T4. */
- if (trav->nstack == 0)
- {
- trav->init = 0;
- return NULL;
- }
- trav->p = trav->stack[--trav->nstack];
-
- /* T5. */
- return trav->p->data;
- }
-}
-
-/* Search TREE for an item matching ITEM. If found, returns a pointer
- to the address of the item. If none is found, ITEM is inserted
- into the tree, and a pointer to the address of ITEM is returned.
- In either case, the pointer returned can be changed by the caller,
- or the returned data item can be directly edited, but the key data
- in the item must not be changed. */
-void **
-avl_probe (avl_tree *tree, void *item)
-{
- /* Uses Knuth's Algorithm 6.2.3A (balanced tree search and
- insertion), but caches results of comparisons. In empirical
- tests this eliminates about 25% of the comparisons seen under
- random insertions. */
-
- /* A1. */
- avl_node *t;
- avl_node *s, *p, *q, *r;
-
- assert (tree != NULL);
- t = &tree->root;
- s = p = t->link[0];
-
- if (s == NULL)
- {
- tree->count++;
- assert (tree->count == 1);
- q = t->link[0] = new_node (tree->pool);
- q->data = item;
- q->link[0] = q->link[1] = NULL;
- q->bal = 0;
- return &q->data;
- }
-
- for (;;)
- {
- /* A2. */
- int diff = tree->cmp (item, p->data, tree->param);
-
- /* A3. */
- if (diff < 0)
- {
- p->cache = 0;
- q = p->link[0];
- if (q == NULL)
- {
- p->link[0] = q = new_node (tree->pool);
- break;
- }
- }
- /* A4. */
- else if (diff > 0)
- {
- p->cache = 1;
- q = p->link[1];
- if (q == NULL)
- {
- p->link[1] = q = new_node (tree->pool);
- break;
- }
- }
- else
- /* A2. */
- return &p->data;
-
- /* A3, A4. */
- if (q->bal != 0)
- t = p, s = q;
- p = q;
- }
-
- /* A5. */
- tree->count++;
- q->data = item;
- q->link[0] = q->link[1] = NULL;
- q->bal = 0;
-
- /* A6. */
- r = p = s->link[(int) s->cache];
- while (p != q)
- {
- p->bal = p->cache * 2 - 1;
- p = p->link[(int) p->cache];
- }
-
- /* A7. */
- if (s->cache == 0)
- {
- /* a = -1. */
- if (s->bal == 0)
- {
- s->bal = -1;
- return &q->data;
- }
- else if (s->bal == +1)
- {
- s->bal = 0;
- return &q->data;
- }
-
- assert (s->bal == -1);
- if (r->bal == -1)
- {
- /* A8. */
- p = r;
- s->link[0] = r->link[1];
- r->link[1] = s;
- s->bal = r->bal = 0;
- }
- else
- {
- /* A9. */
- assert (r->bal == +1);
- p = r->link[1];
- r->link[1] = p->link[0];
- p->link[0] = r;
- s->link[0] = p->link[1];
- p->link[1] = s;
- if (p->bal == -1)
- s->bal = 1, r->bal = 0;
- else if (p->bal == 0)
- s->bal = r->bal = 0;
- else
- {
- assert (p->bal == +1);
- s->bal = 0, r->bal = -1;
- }
- p->bal = 0;
- }
- }
- else
- {
- /* a == +1. */
- if (s->bal == 0)
- {
- s->bal = 1;
- return &q->data;
- }
- else if (s->bal == -1)
- {
- s->bal = 0;
- return &q->data;
- }
-
- assert (s->bal == +1);
- if (r->bal == +1)
- {
- /* A8. */
- p = r;
- s->link[1] = r->link[0];
- r->link[0] = s;
- s->bal = r->bal = 0;
- }
- else
- {
- /* A9. */
- assert (r->bal == -1);
- p = r->link[0];
- r->link[0] = p->link[1];
- p->link[1] = r;
- s->link[1] = p->link[0];
- p->link[0] = s;
- if (p->bal == +1)
- s->bal = -1, r->bal = 0;
- else if (p->bal == 0)
- s->bal = r->bal = 0;
- else
- {
- assert (p->bal == -1);
- s->bal = 0, r->bal = 1;
- }
- p->bal = 0;
- }
- }
-
- /* A10. */
- if (t != &tree->root && s == t->link[1])
- t->link[1] = p;
- else
- t->link[0] = p;
-
- return &q->data;
-}
-
-/* Search TREE for an item matching ITEM, and return it if found. */
-void *
-avl_find (const avl_tree *tree, const void *item)
-{
- const avl_node *p;
-
- assert (tree != NULL);
- for (p = tree->root.link[0]; p; )
- {
- int diff = tree->cmp (item, p->data, tree->param);
-
- if (diff < 0)
- p = p->link[0];
- else if (diff > 0)
- p = p->link[1];
- else
- return p->data;
- }
-
- return NULL;
-}
-
-/* Searches AVL tree TREE for an item matching ITEM. If found, the
- item is removed from the tree and the actual item found is returned
- to the caller. If no item matching ITEM exists in the tree,
- returns NULL. */
-void *
-avl_delete (avl_tree *tree, const void *item)
-{
- /* Uses my Algorithm D, which can be found at
- http://www.msu.edu/user/pfaffben/avl. Algorithm D is based on
- Knuth's Algorithm 6.2.2D (Tree deletion) and 6.2.3A (Balanced
- tree search and insertion), as well as the notes on pages 465-466
- of Vol. 3. */
-
- /* D1. */
- avl_node *pa[AVL_MAX_HEIGHT]; /* Stack P: Nodes. */
- char a[AVL_MAX_HEIGHT]; /* Stack P: Bits. */
- int k = 1; /* Stack P: Pointer. */
-
- avl_node **q;
- avl_node *p;
-
- assert (tree != NULL);
-
- a[0] = 0;
- pa[0] = &tree->root;
- p = tree->root.link[0];
- for (;;)
- {
- /* D2. */
- int diff;
-
- if (p == NULL)
- return NULL;
-
- diff = tree->cmp (item, p->data, tree->param);
- if (diff == 0)
- break;
-
- /* D3, D4. */
- pa[k] = p;
- if (diff < 0)
- {
- p = p->link[0];
- a[k] = 0;
- }
- else if (diff > 0)
- {
- p = p->link[1];
- a[k] = 1;
- }
- k++;
- }
- tree->count--;
-
- item = p->data;
-
- /* D5. */
- q = &pa[k - 1]->link[(int) a[k - 1]];
- if (p->link[1] == NULL)
- {
- *q = p->link[0];
- if (*q)
- (*q)->bal = 0;
- }
- else
- {
- /* D6. */
- avl_node *r = p->link[1];
- if (r->link[0] == NULL)
- {
- r->link[0] = p->link[0];
- *q = r;
- r->bal = p->bal;
- a[k] = 1;
- pa[k++] = r;
- }
- else
- {
- /* D7. */
- avl_node *s = r->link[0];
- int l = k++;
-
- a[k] = 0;
- pa[k++] = r;
-
- /* D8. */
- while (s->link[0] != NULL)
- {
- r = s;
- s = r->link[0];
- a[k] = 0;
- pa[k++] = r;
- }
-
- /* D9. */
- a[l] = 1;
- pa[l] = s;
- s->link[0] = p->link[0];
- r->link[0] = s->link[1];
- s->link[1] = p->link[1];
- s->bal = p->bal;
- *q = s;
- }
- }
-
-#if PSPP
- if (tree->pool == NULL)
-#endif
- free (p);
-
- assert (k > 0);
- /* D10. */
- while (--k)
- {
- avl_node *s = pa[k], *r;
-
- if (a[k] == 0)
- {
- /* D10. */
- if (s->bal == -1)
- {
- s->bal = 0;
- continue;
- }
- else if (s->bal == 0)
- {
- s->bal = 1;
- break;
- }
-
- assert (s->bal == +1);
- r = s->link[1];
-
- assert (r != NULL);
- if (r->bal == 0)
- {
- /* D11. */
- s->link[1] = r->link[0];
- r->link[0] = s;
- r->bal = -1;
- pa[k - 1]->link[(int) a[k - 1]] = r;
- break;
- }
- else if (r->bal == +1)
- {
- /* D12. */
- s->link[1] = r->link[0];
- r->link[0] = s;
- s->bal = r->bal = 0;
- pa[k - 1]->link[(int) a[k - 1]] = r;
- }
- else
- {
- /* D13. */
- assert (r->bal == -1);
- p = r->link[0];
- r->link[0] = p->link[1];
- p->link[1] = r;
- s->link[1] = p->link[0];
- p->link[0] = s;
- if (p->bal == +1)
- s->bal = -1, r->bal = 0;
- else if (p->bal == 0)
- s->bal = r->bal = 0;
- else
- {
- assert (p->bal == -1);
- s->bal = 0, r->bal = +1;
- }
- p->bal = 0;
- pa[k - 1]->link[(int) a[k - 1]] = p;
- }
- }
- else
- {
- assert (a[k] == 1);
-
- /* D10. */
- if (s->bal == +1)
- {
- s->bal = 0;
- continue;
- }
- else if (s->bal == 0)
- {
- s->bal = -1;
- break;
- }
-
- assert (s->bal == -1);
- r = s->link[0];
-
- if (r == NULL || r->bal == 0)
- {
- /* D11. */
- s->link[0] = r->link[1];
- r->link[1] = s;
- r->bal = 1;
- pa[k - 1]->link[(int) a[k - 1]] = r;
- break;
- }
- else if (r->bal == -1)
- {
- /* D12. */
- s->link[0] = r->link[1];
- r->link[1] = s;
- s->bal = r->bal = 0;
- pa[k - 1]->link[(int) a[k - 1]] = r;
- }
- else if (r->bal == +1)
- {
- /* D13. */
- p = r->link[1];
- r->link[1] = p->link[0];
- p->link[0] = r;
- s->link[0] = p->link[1];
- p->link[1] = s;
- if (p->bal == -1)
- s->bal = 1, r->bal = 0;
- else if (p->bal == 0)
- s->bal = r->bal = 0;
- else
- {
- assert (p->bal == 1);
- s->bal = 0, r->bal = -1;
- }
- p->bal = 0;
- pa[k - 1]->link[(int) a[k - 1]] = p;
- }
- }
- }
-
- return (void *) item;
-}
-
-/* Inserts ITEM into TREE. Returns NULL if the item was inserted,
- otherwise a pointer to the duplicate item. */
-void *
-avl_insert (avl_tree *tree, void *item)
-{
- void **p;
-
- assert (tree != NULL);
-
- p = avl_probe (tree, item);
- return (*p == item) ? NULL : *p;
-}
-
-/* If ITEM does not exist in TREE, inserts it and returns NULL. If a
- matching item does exist, it is replaced by ITEM and the item
- replaced is returned. The caller is responsible for freeing the
- item returned. */
-void *
-avl_replace (avl_tree *tree, void *item)
-{
- void **p;
-
- assert (tree != NULL);
-
- p = avl_probe (tree, item);
- if (*p == item)
- return NULL;
- else
- {
- void *r = *p;
- *p = item;
- return r;
- }
-}
-
-/* Delete ITEM from TREE when you know that ITEM must be in TREE. For
- debugging purposes. */
-void *
-(avl_force_delete) (avl_tree *tree, void *item)
-{
- void *found = avl_delete (tree, item);
- assert (found != NULL);
- return found;
-}
-\f
-#if SELF_TEST
-
-/* Used to flag delayed aborting. */
-int done = 0;
-
-/* Print the structure of node NODE of an avl tree, which is LEVEL
- levels from the top of the tree. Uses different delimiters to
- visually distinguish levels. */
-void
-print_structure (avl_node *node, int level)
-{
- char lc[] = "([{`/";
- char rc[] = ")]}'\\";
-
- assert (level <= 10);
-
- if (node == NULL)
- {
- printf (" nil");
- return;
- }
- printf (" %c%d", lc[level % 5], (int) node->data);
- if (node->link[0] || node->link[1])
- print_structure (node->link[0], level + 1);
- if (node->link[1])
- print_structure (node->link[1], level + 1);
- printf ("%c", rc[level % 5]);
-}
-
-/* Compare two integers A and B and return a strcmp()-type result. */
-int
-compare_ints (const void *a, const void *b, void *param unused)
-{
- return ((int) a) - ((int) b);
-}
-
-/* Print the value of integer A. */
-void
-print_int (void *a, void *param unused)
-{
- printf (" %d", (int) a);
-}
-
-/* Linearly print contents of TREE. */
-void
-print_contents (avl_tree *tree)
-{
- avl_walk (tree, print_int, NULL);
- printf ("\n");
-}
-
-/* Examine NODE in a avl tree. *COUNT is increased by the number of
- nodes in the tree, including the current one. If the node is the
- root of the tree, PARENT should be INT_MIN, otherwise it should be
- the parent node value. DIR is the direction that the current node
- is linked from the parent: -1 for left child, +1 for right child;
- it is not used if PARENT is INT_MIN. Returns the height of the
- tree rooted at NODE. */
-int
-recurse_tree (avl_node *node, int *count, int parent, int dir)
-{
- if (node)
- {
- int d = (int) node->data;
- int nl = node->link[0] ? recurse_tree (node->link[0], count, d, -1) : 0;
- int nr = node->link[1] ? recurse_tree (node->link[1], count, d, 1) : 0;
- (*count)++;
-
- if (nr - nl != node->bal)
- {
- printf (" Node %d is unbalanced: right height=%d, left height=%d, "
- "difference=%d, but balance factor=%d.\n",
- d, nr, nl, nr - nl, node->bal);
- done = 1;
- }
-
- if (parent != INT_MIN)
- {
- assert (dir == -1 || dir == +1);
- if (dir == -1 && d > parent)
- {
- printf (" Node %d is smaller than its left child %d.\n",
- parent, d);
- done = 1;
- }
- else if (dir == +1 && d < parent)
- {
- printf (" Node %d is larger than its right child %d.\n",
- parent, d);
- done = 1;
- }
- }
- assert (node->bal >= -1 && node->bal <= 1);
- return 1 + (nl > nr ? nl : nr);
- }
- else return 0;
-}
-
-/* Check that everything about TREE is kosher. */
-void
-verify_tree (avl_tree *tree)
-{
- int count = 0;
- recurse_tree (tree->root.link[0], &count, INT_MIN, 0);
- if (count != tree->count)
- {
- printf (" Tree has %d nodes, but tree count is %d.\n",
- count, tree->count);
- done = 1;
- }
- if (done)
- abort ();
-}
-
-/* Arrange the N elements of ARRAY in random order. */
-void
-shuffle (int *array, int n)
-{
- int i;
-
- for (i = 0; i < n; i++)
- {
- int j = i + rand () % (n - i);
- int t = array[j];
- array[j] = array[i];
- array[i] = t;
- }
-}
-
-/* Compares avl trees rooted at A and B, making sure that they are
- identical. */
-void
-compare_trees (avl_node *a, avl_node *b)
-{
- if (a == NULL || b == NULL)
- {
- assert (a == NULL && b == NULL);
- return;
- }
- if (a->data != b->data || a->bal != b->bal
- || ((a->link[0] != NULL) ^ (b->link[0] != NULL))
- || ((a->link[1] != NULL) ^ (b->link[1] != NULL)))
- {
- printf (" Copied nodes differ: %d b=%d a->bal=%d b->bal=%d a:",
- (int) a->data, (int) b->data, a->bal, b->bal);
- if (a->link[0])
- printf ("l");
- if (a->link[1])
- printf ("r");
- printf (" b:");
- if (b->link[0])
- printf ("l");
- if (b->link[1])
- printf ("r");
- printf ("\n");
- abort ();
- }
- if (a->link[0] != NULL)
- compare_trees (a->link[0], b->link[0]);
- if (a->link[1] != NULL)
- compare_trees (a->link[1], b->link[1]);
-}
-
-/* Simple stress test procedure for the AVL tree routines. Does the
- following:
-
- * Generate a random number seed. By default this is generated from
- the current time. You can also pass a seed value on the command
- line if you want to test the same case. The seed value is
- displayed.
-
- * Create a tree and insert the integers from 0 up to TREE_SIZE - 1
- into it, in random order. Verify the tree structure after each
- insertion.
-
- * Remove each integer from the tree, in a different random order.
- After each deletion, verify the tree structure; also, make a copy
- of the tree into a new tree, verify the copy and compare it to the
- original, then destroy the copy.
-
- * Destroy the tree, increment the random seed value, and start over.
-
- If you make any modifications to the avl tree routines, then you
- might want to insert some calls to print_structure() at strategic
- places in order to be able to see what's really going on. Also,
- memory debuggers like Checker or Purify are very handy. */
-#define TREE_SIZE 1024
-#define N_ITERATIONS 16
-int
-main (int argc, char **argv)
-{
- int array[TREE_SIZE];
- int seed;
- int iteration;
-
- if (argc == 2)
- seed = atoi (argv[1]);
- else
- seed = time (0) * 257 % 32768;
-
- fputs ("Testing avl...\n", stdout);
-
- for (iteration = 1; iteration <= N_ITERATIONS; iteration++)
- {
- avl_tree *tree;
- int i;
-
- printf ("Iteration %4d/%4d: seed=%5d", iteration, N_ITERATIONS, seed);
- fflush (stdout);
-
- srand (seed++);
-
- for (i = 0; i < TREE_SIZE; i++)
- array[i] = i;
- shuffle (array, TREE_SIZE);
-
- tree = avl_create (compare_ints, NULL);
- for (i = 0; i < TREE_SIZE; i++)
- avl_force_insert (tree, (void *) (array[i]));
- verify_tree (tree);
-
- shuffle (array, TREE_SIZE);
- for (i = 0; i < TREE_SIZE; i++)
- {
- avl_tree *copy;
-
- avl_delete (tree, (void *) (array[i]));
- verify_tree (tree);
-
- copy = avl_copy (tree, NULL);
- verify_tree (copy);
- compare_trees (tree->root.link[0], copy->root.link[0]);
- avl_destroy (copy, NULL);
-
- if (i % 128 == 0)
- {
- putchar ('.');
- fflush (stdout);
- }
- }
- fputs (" good.\n", stdout);
-
- avl_destroy (tree, NULL);
- }
-
- return 0;
-}
-#endif /* SELF_TEST */
-
-/*
- Local variables:
- compile-command: "gcc -DSELF_TEST=1 -W -Wall -I. -o ./avl-test avl.c"
- End:
-*/
-
+++ /dev/null
-/* libavl - manipulates AVL trees.
- Copyright (C) 1998-9, 2000 Free Software Foundation, Inc.
- Written by Ben Pfaff <blp@gnu.org>.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
-
-/* This is file avl.h in libavl, version 1.1.0. */
-
-#if !avl_h
-#define avl_h 1
-
-/* This stack size allows for AVL trees for between 5,704,880 and
- 4,294,967,295 nodes, depending on order of insertion. If you
- increase this it will require recoding some functions that assume
- one long is big enough for a bitmap. */
-#ifndef AVL_MAX_HEIGHT
-#define AVL_MAX_HEIGHT 32
-#endif
-
-/* Structure for a node in an AVL tree. */
-typedef struct avl_node
- {
- void *data; /* Pointer to data. */
- struct avl_node *link[2]; /* Subtrees. */
- signed char bal; /* Balance factor. */
- char cache; /* Used during insertion. */
- signed char pad[2]; /* Unused. Reserved for threaded trees. */
- }
-avl_node;
-
-/* Used for traversing an AVL tree. */
-typedef struct avl_traverser
- {
- int init; /* Initialized? */
- int nstack; /* Top of stack. */
- const avl_node *p; /* Used for traversal. */
- const avl_node *stack[AVL_MAX_HEIGHT];/* Descended trees. */
- }
-avl_traverser;
-
-#define avl_traverser_init(TRAVERSER) (TRAVERSER).init = 0
-
-/* Function types. */
-#if !AVL_FUNC_TYPES
-#define AVL_FUNC_TYPES 1
-typedef int (*avl_comparison_func) (const void *a, const void *b, void *param);
-typedef void (*avl_node_func) (void *data, void *param);
-typedef void *(*avl_copy_func) (void *data, void *param);
-#endif
-
-/* Structure which holds information about an AVL tree. */
-typedef struct avl_tree
- {
-#if PSPP
- struct pool *pool; /* Pool to store nodes. */
-#endif
- avl_node root; /* Tree root node. */
- avl_comparison_func cmp; /* Used to compare keys. */
- int count; /* Number of nodes in the tree. */
- void *param; /* Arbitary user data. */
- }
-avl_tree;
-
-#if PSPP
-#define MAYBE_POOL struct pool *pool,
-#else
-#define MAYBE_POOL /* nothing */
-#endif
-
-/* General functions. */
-avl_tree *avl_create (MAYBE_POOL avl_comparison_func, void *param);
-void avl_destroy (avl_tree *, avl_node_func);
-void avl_free (avl_tree *);
-int avl_count (const avl_tree *);
-avl_tree *avl_copy (MAYBE_POOL const avl_tree *, avl_copy_func);
-
-/* Walk the tree. */
-void avl_walk (const avl_tree *, avl_node_func, void *param);
-void *avl_traverse (const avl_tree *, avl_traverser *);
-
-/* Search for a given item. */
-void **avl_probe (avl_tree *, void *);
-void *avl_delete (avl_tree *, const void *);
-void *avl_find (const avl_tree *, const void *);
-
-#if __GCC__ >= 2
-extern inline void *
-avl_insert (avl_tree *tree, void *item)
-{
- void **p = avl_probe (tree, item);
- return (*p == item) ? NULL : *p;
-}
-
-extern inline void *
-avl_replace (avl_tree *tree, void *item)
-{
- void **p = avl_probe (tree, item);
- if (*p == item)
- return NULL;
- else
- {
- void *r = *p;
- *p = item;
- return r;
- }
-}
-#else /* not gcc */
-void *avl_insert (avl_tree *tree, void *item);
-void *avl_replace (avl_tree *tree, void *item);
-#endif /* not gcc */
-
-/* Easy assertions on insertion & deletion. */
-#ifndef NDEBUG
-#define avl_force_insert(A, B) \
- do \
- { \
- void *r = avl_insert (A, B); \
- assert (r == NULL); \
- } \
- while (0)
-void *avl_force_delete (avl_tree *, void *);
-#else
-#define avl_force_insert(A, B) \
- avl_insert (A, B)
-#define avl_force_delete(A, B) \
- avl_delete (A, B)
-#endif
-
-#endif /* avl_h */
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
+#include "algorithm.h"
#include "alloc.h"
-#include "avl.h"
#include "hash.h"
#include "pool.h"
#include "dcdflib/cdflib.h"
#include "stats.h"
#include "output.h"
#include "tab.h"
+#include "value-labels.h"
#include "var.h"
#include "vfm.h"
{
int i;
- if (var_dict->var_by_name)
+ if (var_dict->name_tab)
{
- avl_destroy (var_dict->var_by_name, NULL);
- var_dict->var_by_name = NULL;
+ hsh_destroy (var_dict->name_tab);
+ var_dict->name_tab = NULL;
}
for (i = 0; i < var_dict->nvar; i++)
var->type = v[i]->type;
var->foo = v[i]->index;
var_dict->var[i] = var;
- avl_force_insert (var_dict->var_by_name, var);
+ hsh_force_insert (var_dict->name_tab, var);
}
free (v);
/* Add record to hash table. */
{
- struct table_entry **tepp = (struct table_entry **) hsh_probe (gen_tab, te);
- if (NULL == *tepp)
+ struct table_entry **tepp
+ = (struct table_entry **) hsh_probe (gen_tab, te);
+ if (*tepp == NULL)
{
struct table_entry *tep = pool_alloc (pl_tc, entry_size);
}
#endif
-/* Compare the table_entry's at PA and PB and return a strcmp()-type
+/* Compare the table_entry's at A and B and return a strcmp()-type
result. */
static int
-compare_table_entry (const void *pa, const void *pb, void *foo unused)
+compare_table_entry (const void *a_, const void *b_, void *foo unused)
{
- const struct table_entry *a = pa;
- const struct table_entry *b = pb;
-
- {
- const int difftable = a->table - b->table;
- if (difftable)
- return difftable;
- }
+ const struct table_entry *a = a_;
+ const struct table_entry *b = b_;
+
+ if (a->table > b->table)
+ return 1;
+ else if (a->table < b->table)
+ return -1;
{
const struct crosstab *x = xtab[a->table];
\f
/* Post-data reading calculations. */
-static struct table_entry **find_pivot_extent (struct table_entry **, int *cnt, int pivot);
-static void enum_var_values (struct table_entry **beg, int cnt,
- union value **values, int *nvalues,
- int var_index);
+static struct table_entry **find_pivot_extent (struct table_entry **,
+ int *cnt, int pivot);
+static void enum_var_values (struct table_entry **entries, int entry_cnt,
+ int var_idx,
+ union value **values, int *value_cnt);
static void output_pivot_table (struct table_entry **, struct table_entry **,
double **, double **, double **,
int *, int *, int *);
struct table_entry *cmp;
x = xtab[(*pb)->table];
- enum_var_values (pb, pe - pb, &cols, &n_cols, COL_VAR);
+ enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols);
nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2;
break;
/* Find all the row variable values. */
- enum_var_values (tb, te - tb, &rows, &n_rows, ROW_VAR);
+ enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows);
/* Allocate memory space for the column and row totals. */
if (n_rows > *maxrows)
/* Integer mode correspondent to find_pivot_extent_general(). This
could be optimized somewhat, but I just don't give a crap about
- CROSSTABS performance in integer mode, which is just a wart on
- CROSSTABS' ass as far as I'm concerned.
+ CROSSTABS performance in integer mode, which is just a
+ CROSSTABS wart as far as I'm concerned.
That said, feel free to send optimization patches to me. */
static struct table_entry **
return tp;
}
-/* Compare value * A and B, where WIDTH is the string width or 0 for
- numerics, and return a strcmp()-type result. */
+/* Compares `union value's A_ and B_ and returns a strcmp()-like
+ result. WIDTH_ points to an int which is either 0 for a
+ numeric value or a string width for a string value. */
static int
-compare_value (const void *pa, const void *pb, void *pwidth)
+compare_value (const void *a_, const void *b_, void *width_)
{
- const union value *a = pa;
- const union value *b = pb;
- const int width = (int) pwidth;
+ const union value *a = a_;
+ const union value *b = b_;
+ const int *pwidth = width_;
+ const int width = *pwidth;
- if (width)
- return strncmp (a->s, b->s, width);
+ if (width == 0)
+ return (a->f < b->f) ? -1 : (a->f > b->f);
else
- return a->f < b->f ? -1 : (a->f > b->f ? 1 : 0);
+ return strncmp (a->s, b->s, width);
}
-/* Given a list of CNT table_entry's starting at BEG, creates a list
- of *NVALUES values *VALUES of variable with index VAR_INDEX. */
+/* Given an array of ENTRY_CNT table_entry structures starting at
+ ENTRIES, creates a sorted list of the values that the variable
+ with index VAR_INDEX takes on. The values are returned as a
+ malloc()'darray stored in *VALUES, with the number of values
+ stored in *VALUE_CNT.
+ */
static void
-enum_var_values (struct table_entry **beg, int cnt, union value **values, int *nvalues,
- int var_index)
+enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx,
+ union value **values, int *value_cnt)
{
if (mode == GENERAL)
{
- avl_tree *tree;
-
- tree = avl_create (pl_col, compare_value,
- (void *) (xtab[(*beg)->table]->v[var_index]->width));
-
- {
- int i;
-
- for (i = 0; i < cnt; i++)
- avl_insert (tree, &beg[i]->v[var_index]);
- *values = xmalloc (sizeof **values * avl_count (tree));
- }
-
- {
- avl_traverser trav;
- union value *v;
- int i;
-
- avl_traverser_init(trav);
- i = 0;
- while (NULL != (v = avl_traverse (tree, &trav)))
- (*values)[i++] = *v;
- *nvalues = i;
- }
+ int width = xtab[(*entries)->table]->v[var_idx]->width;
+ int i;
- /* Destroy tree. */
- pool_destroy (pl_col);
- pl_col = pool_create ();
+ *values = xmalloc (sizeof **values * entry_cnt);
+ for (i = 0; i < entry_cnt; i++)
+ (*values)[i] = entries[i]->v[var_idx];
+ *value_cnt = sort_unique (*values, entry_cnt, sizeof **values,
+ compare_value, &width);
}
else
{
- struct crosstab_proc *crs = &xtab[(*beg)->table]->v[var_index]->p.crs;
+ struct crosstab_proc *crs = &xtab[(*entries)->table]->v[var_idx]->p.crs;
int i;
assert (mode == INTEGER);
*values = xmalloc (sizeof **values * crs->count);
for (i = 0; i < crs->count; i++)
(*values)[i].f = i + crs->min;
- *nvalues = crs->count;
+ *value_cnt = crs->count;
}
}
{
struct len_string s;
- char *label = get_val_lab (var, *v, 0);
+ const char *label = val_labs_find (var->val_labs, *v);
if (label)
{
tab_text (table, c, r, TAB_LEFT, label);
break;
case OP_NORMAL:
if (sp->f != SYSMIS)
- sp->f = rand_normal (sp->f);
+ sp->f *= rng_get_double_normal (pspp_rng ());
break;
case OP_UNIFORM:
if (sp->f != SYSMIS)
- sp->f = rand_uniform (sp->f);
+ sp->f *= rng_get_double (pspp_rng ());
break;
case OP_SYSMIS:
if (sp[0].f == SYSMIS || !finite (sp[0].f))
void *ext; /* Extension struct for fhuser use. */
};
-/* All the file handles in the system. */
-extern struct avl_tree *files;
-
/* Pointer to the file handle that corresponds to data in the command
file entered via BEGIN DATA/END DATA. */
extern struct file_handle *inline_file;
#include <errno.h>
#include <stdlib.h>
#include "alloc.h"
-#include "avl.h"
#include "filename.h"
#include "file-handle.h"
#include "command.h"
+#include "hash.h"
#include "lexer.h"
#include "getline.h"
#include "error.h"
#include "debug-print.h"
-avl_tree *files;
+static struct hsh_table *files;
struct file_handle *inline_file;
static void init_file_handle (struct file_handle * handle);
fp = NULL;
if (files)
- fp = avl_find (files, &handle_name_p);
+ fp = hsh_find (files, &handle_name_p);
if (fp)
{
msg (SE, _("File handle %s had already been defined to refer to "
fp->name = xstrdup (handle_name);
fp->norm_fn = fn_normalize (cmd.s_name);
fp->where.filename = fp->fn = cmd.s_name;
- avl_force_insert (files, fp);
+ hsh_force_insert (files, fp);
return CMD_SUCCESS;
strcpy (&name[1], fn);
f.name = name;
- fp = avl_find (files, &f);
+ fp = hsh_find (files, &f);
if (!fp)
{
fp = xmalloc (sizeof *fp);
fp->name = name;
fp->norm_fn = fn;
fp->where.filename = fp->fn = xstrdup (filename);
- avl_force_insert (files, fp);
+ hsh_force_insert (files, fp);
}
else
{
{
struct file_handle f, *fp;
f.name = (char *) name;
- fp = avl_find (files, &f);
+ fp = hsh_find (files, &f);
if (!fp)
msg (SE, _("File handle `%s' has not been previously declared on "
h->ext = NULL;
}
+/* Hashes the name of file handle H. */
+static unsigned
+hash_file_handle (const void *handle_, void *param unused)
+{
+ const struct file_handle *handle = handle_;
+
+ return hsh_hash_string (handle->name);
+}
+
/* Compares names of file handles A and B. */
static int
-cmp_file_handle (const void *a, const void *b, void *foo unused)
+cmp_file_handle (const void *a_, const void *b_, void *foo unused)
{
- return strcmp (((struct file_handle *) a)->name,
- ((struct file_handle *) b)->name);
+ const struct file_handle *a = a_;
+ const struct file_handle *b = b_;
+
+ return strcmp (a->name, b->name);
}
-/* Initialize the AVL tree of file handles; inserts the "inline file"
+/* Initialize the hash of file handles; inserts the "inline file"
inline_file. */
void
fh_init_files (void)
{
- /* Create AVL tree. */
- files = avl_create (NULL, cmp_file_handle, NULL);
+ /* Create hash. */
+ files = hsh_create (4, cmp_file_handle, hash_file_handle, NULL, NULL);
/* Insert inline file. */
inline_file = xmalloc (sizeof *inline_file);
inline_file->where.filename
= inline_file->fn = inline_file->norm_fn = (char *) _("<Inline File>");
inline_file->where.line_number = 0;
- avl_force_insert (files, inline_file);
+ hsh_force_insert (files, inline_file);
}
/* Parses a file handle name, which may be a filename as a string or
+++ /dev/null
-/* PSPP - computes sample statistics. -*- C -*-
- Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
- Written by Ben Pfaff <blp@gnu.org>.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
-
-/* Included by frequencies.q. */
-
-#if WEIGHTING
- #define WEIGHT w
- #define FUNCNAME calc_weighting
-#else /* !WEIGHTING */
- #define WEIGHT 1.0
- #define FUNCNAME calc_no_weight
-#endif /* !WEIGHTING */
-
-static int
-FUNCNAME (struct ccase *c)
-{
- int i;
-#if WEIGHTING
- double w;
-
- w = c->data[default_dict.var[default_dict.weight_index]->fv].f;
-#endif
-
- for (i = 0; i < n_variables; i++)
- {
- struct variable *v = v_variables[i];
- union value *val = &c->data[v->fv];
- struct freq_tab *ft = &v->p.frq.tab;
-
- switch (v->p.frq.tab.mode)
- {
- case FRQM_GENERAL:
- {
- /* General mode. This declaration and initialization are
- strictly conforming: see C89 section 6.5.2.1. */
- struct freq *fp = avl_find (ft->tree, (struct freq *) val);
-
- if (fp)
- fp->c += WEIGHT;
- else
- {
- fp = pool_alloc (gen_pool, sizeof *fp);
- fp->v = *val;
- fp->c = WEIGHT;
- avl_insert (ft->tree, fp);
- if (is_missing (val, v))
- v->p.frq.tab.n_missing++;
- }
- }
- break;
- case FRQM_INTEGER:
- /* Integer mode. */
- if (val->f == SYSMIS)
- v->p.frq.tab.sysmis += WEIGHT;
- else if (val->f > INT_MIN+1 && val->f < INT_MAX-1)
- {
- int i = val->f;
- if (i >= v->p.frq.tab.min && i <= v->p.frq.tab.max)
- v->p.frq.tab.vector[i - v->p.frq.tab.min] += WEIGHT;
- }
- else
- v->p.frq.tab.out_of_range += WEIGHT;
- break;
- default:
- assert (0);
- }
- }
- return 1;
-}
-
-#undef WEIGHT
-#undef WEIGHTING
-#undef FUNCNAME
#include <math.h>
#include <stdlib.h>
#include "alloc.h"
-#include "avl.h"
#include "bitvector.h"
#include "hash.h"
#include "pool.h"
#include "command.h"
#include "lexer.h"
#include "error.h"
-#include "approx.h"
+#include "algorithm.h"
#include "magic.h"
#include "misc.h"
#include "stats.h"
#include "output.h"
#include "som.h"
+#include "str.h"
#include "tab.h"
+#include "value-labels.h"
#include "var.h"
#include "vfm.h"
-#include "str.h"
#include "debug-print.h"
};
/* Percentiles to calculate. */
-static double *percentiles=0;
-static double *percentile_values=0;
-static int n_percentiles=0;
+static double *percentiles;
+static double *percentile_values;
+static int n_percentiles;
/* Groups of statistics. */
#define BI BIT_INDEX
static void determine_charts (void);
static void precalc (void);
-static int calc_weighting (struct ccase *);
-static int calc_no_weight (struct ccase *);
+static int calc (struct ccase *);
static void postcalc (void);
static void postprocess_freq_tab (struct variable *);
static void dump_statistics (struct variable *, int show_varname);
static void cleanup_freq_tab (struct variable *);
-static int compare_value_numeric_a (const void *, const void *, void *);
-static int compare_value_alpha_a (const void *, const void *, void *);
-static int compare_value_numeric_d (const void *, const void *, void *);
-static int compare_value_alpha_d (const void *, const void *, void *);
-static int compare_freq_numeric_a (const void *, const void *, void *);
-static int compare_freq_alpha_a (const void *, const void *, void *);
-static int compare_freq_numeric_d (const void *, const void *, void *);
-static int compare_freq_alpha_d (const void *, const void *, void *);
+static hsh_hash_func hash_value_numeric, hash_value_alpha;
+static hsh_compare_func compare_value_numeric_a, compare_value_alpha_a;
+static hsh_compare_func compare_value_numeric_d, compare_value_alpha_d;
+static hsh_compare_func compare_freq_numeric_a, compare_freq_alpha_a;
+static hsh_compare_func compare_freq_numeric_d, compare_freq_alpha_d;
\f
/* Parser and outline. */
static int
internal_cmd_frequencies (void)
{
- int (*calc) (struct ccase *);
int i;
n_percentiles = 0;
+ percentile_values = NULL;
percentiles = NULL;
n_variables = 0;
/* Do it! */
update_weighting (&default_dict);
- calc = default_dict.weight_index == -1 ? calc_no_weight : calc_weighting;
procedure (precalc, calc, postcalc);
return CMD_SUCCESS;
}
}
-/* Generate each calc_*(). */
-#define WEIGHTING 0
-#include "frequencies.g"
+/* Add data from case C to the frequency table. */
+static int
+calc (struct ccase *c)
+{
+ double weight;
+ int i;
-#define WEIGHTING 1
-#include "frequencies.g"
+ if (default_dict.weight_index == -1)
+ weight = 1.0;
+ else
+ weight = c->data[default_dict.var[default_dict.weight_index]->fv].f;
+
+ for (i = 0; i < n_variables; i++)
+ {
+ struct variable *v = v_variables[i];
+ union value *val = &c->data[v->fv];
+ struct freq_tab *ft = &v->p.frq.tab;
+
+ switch (v->p.frq.tab.mode)
+ {
+ case FRQM_GENERAL:
+ {
+ /* General mode. */
+ struct freq **fpp = (struct freq **) hsh_probe (ft->data, val);
+
+ if (*fpp != NULL)
+ (*fpp)->c += weight;
+ else
+ {
+ struct freq *fp = *fpp = pool_alloc (gen_pool, sizeof *fp);
+ fp->v = *val;
+ fp->c = weight;
+ }
+ }
+ break;
+ case FRQM_INTEGER:
+ /* Integer mode. */
+ if (val->f == SYSMIS)
+ v->p.frq.tab.sysmis += weight;
+ else if (val->f > INT_MIN+1 && val->f < INT_MAX-1)
+ {
+ int i = val->f;
+ if (i >= v->p.frq.tab.min && i <= v->p.frq.tab.max)
+ v->p.frq.tab.vector[i - v->p.frq.tab.min] += weight;
+ }
+ else
+ v->p.frq.tab.out_of_range += weight;
+ break;
+ default:
+ assert (0);
+ }
+ }
+ return 1;
+}
/* Prepares each variable that is the target of FREQUENCIES by setting
up its hash table. */
if (v->p.frq.tab.mode == FRQM_GENERAL)
{
- avl_comparison_func compare;
- if (v->type == NUMERIC)
- compare = compare_value_numeric_a;
- else
- compare = compare_value_alpha_a;
- v->p.frq.tab.tree = avl_create (gen_pool, compare,
- (void *) v->width);
- v->p.frq.tab.n_missing = 0;
+ hsh_hash_func *hash;
+ hsh_compare_func *compare;
+
+ if (v->type == NUMERIC)
+ {
+ hash = hash_value_numeric;
+ compare = compare_value_numeric_a;
+ }
+ else
+ {
+ hash = hash_value_alpha;
+ compare = compare_value_alpha_a;
+ }
+ v->p.frq.tab.data = hsh_create (16, compare, hash, NULL, v);
}
else
{
}
}
-/* Comparison function called by comparison_helper(). */
-static avl_comparison_func comparison_func;
-
-/* Passed to comparison function by comparison_helper(). */
-static void *comparison_param;
-
-/* Used by postprocess_freq_tab to re-sort frequency tables. */
-static int
-comparison_helper (const void *a, const void *b)
+/* Returns the comparison function that should be used for
+ sorting a frequency table by FRQ_SORT using VAR_TYPE
+ variables. */
+static hsh_compare_func *
+get_freq_comparator (int frq_sort, int var_type)
{
- return comparison_func (&((struct freq *) a)->v,
- &((struct freq *) b)->v, comparison_param);
+ /* Note that q2c generates tags beginning with 1000. */
+ switch (frq_sort | (var_type << 16))
+ {
+ case FRQ_AVALUE | (NUMERIC << 16): return compare_value_numeric_a;
+ case FRQ_AVALUE | (ALPHA << 16): return compare_value_alpha_a;
+ case FRQ_DVALUE | (NUMERIC << 16): return compare_value_numeric_d;
+ case FRQ_DVALUE | (ALPHA << 16): return compare_value_alpha_d;
+ case FRQ_AFREQ | (NUMERIC << 16): return compare_freq_numeric_a;
+ case FRQ_AFREQ | (ALPHA << 16): return compare_freq_alpha_a;
+ case FRQ_DFREQ | (NUMERIC << 16): return compare_freq_numeric_d;
+ case FRQ_DFREQ | (ALPHA << 16): return compare_freq_alpha_d;
+ default: assert (0);
+ }
}
-/* Used by postprocess_freq_tab to construct the array members valid,
- missing of freq_tab. */
-static void
-add_freq (void *data, void *param)
+static int
+not_missing (const void *f_, void *v_)
{
- struct freq *f = data;
- struct variable *v = param;
+ const struct freq *f = f_;
+ struct variable *v = v_;
- v->p.frq.tab.total_cases += f->c;
-
- if ((v->type == NUMERIC && f->v.f == SYSMIS)
- || (cmd.miss == FRQ_EXCLUDE && is_user_missing (&f->v, v)))
- {
- *v->p.frq.tab.missing++ = *f;
- v->p.frq.tab.valid_cases -= f->c;
- }
- else
- *v->p.frq.tab.valid++ = *f;
+ return !is_missing (&f->v, v);
}
static void
postprocess_freq_tab (struct variable * v)
{
- avl_comparison_func compare;
-
- switch (cmd.sort | (v->type << 16))
+ hsh_compare_func *compare;
+ struct freq_tab *ft;
+ size_t count;
+ void **data;
+ struct freq *freqs, *f;
+ size_t i;
+
+ assert (v->p.frq.tab.mode == FRQM_GENERAL);
+ compare = get_freq_comparator (cmd.sort, v->type);
+ ft = &v->p.frq.tab;
+
+ /* Extract data from hash table. */
+ count = hsh_count (ft->data);
+ data = hsh_data (ft->data);
+
+ /* Copy dereferenced data into freqs. */
+ freqs = xmalloc (count* sizeof *freqs);
+ for (i = 0; i < count; i++)
{
- /* Note that q2c generates tags beginning with 1000. */
- case FRQ_AVALUE | (NUMERIC << 16):
- compare = NULL;
- break;
- case FRQ_AVALUE | (ALPHA << 16):
- compare = NULL;
- break;
- case FRQ_DVALUE | (NUMERIC << 16):
- comparison_func = compare_value_numeric_d;
- break;
- case FRQ_DVALUE | (ALPHA << 16):
- compare = compare_value_alpha_d;
- break;
- case FRQ_AFREQ | (NUMERIC << 16):
- compare = compare_freq_numeric_a;
- break;
- case FRQ_AFREQ | (ALPHA << 16):
- compare = compare_freq_alpha_a;
- break;
- case FRQ_DFREQ | (NUMERIC << 16):
- compare = compare_freq_numeric_d;
- break;
- case FRQ_DFREQ | (ALPHA << 16):
- compare = compare_freq_alpha_d;
- break;
- default:
- assert (0);
+ struct freq *f = data[i];
+ freqs[i] = *f;
}
- comparison_func = compare;
-
- if (v->p.frq.tab.mode == FRQM_GENERAL)
- {
- int total;
- struct freq_tab *ft = &v->p.frq.tab;
- total = avl_count (ft->tree);
- ft->n_valid = total - ft->n_missing;
- ft->valid = xmalloc (sizeof (struct freq) * total);
- ft->missing = &ft->valid[ft->n_valid];
- ft->valid_cases = ft->total_cases = 0.0;
+ /* Put data into ft. */
+ ft->valid = freqs;
+ ft->n_valid = partition (freqs, count, sizeof *freqs, not_missing, v);
+ ft->missing = freqs + ft->n_valid;
+ ft->n_missing = count - ft->n_valid;
- avl_walk (ft->tree, add_freq, (void *) v);
+ /* Sort data. */
+ sort (ft->valid, ft->n_valid, sizeof *ft->valid, compare, v);
+ sort (ft->missing, ft->n_missing, sizeof *ft->missing, compare, v);
- ft->valid -= ft->n_valid;
- ft->missing -= ft->n_missing;
- ft->valid_cases += ft->total_cases;
+ /* Summary statistics. */
+ ft->total_cases = ft->valid_cases = 0.0;
+ for (f = ft->valid; f < ft->valid + ft->n_valid; f++)
+ {
+ ft->total_cases += f->c;
- if (compare)
- {
- qsort (ft->valid, ft->n_valid, sizeof (struct freq), comparison_helper);
- qsort (ft->missing, ft->n_missing, sizeof (struct freq), comparison_helper);
- }
+ if ((v->type != NUMERIC || f->v.f != SYSMIS)
+ && (cmd.miss != FRQ_EXCLUDE || !is_user_missing (&f->v, v)))
+ ft->valid_cases += f->c;
}
- else
- assert (0);
}
static void
-cleanup_freq_tab (struct variable * v)
+cleanup_freq_tab (struct variable *v)
{
- if (v->p.frq.tab.mode == FRQM_GENERAL)
- {
- struct freq_tab *ft = &v->p.frq.tab;
-
- free (ft->valid);
- }
- else
- assert (0);
+ assert (v->p.frq.tab.mode == FRQM_GENERAL);
+ free (v->p.frq.tab.valid);
}
/* Parses the VARIABLES subcommand, adding to
break;
if (i >= n_percentiles || tokval != percentiles[i])
{
- percentiles = pool_realloc (int_pool, percentiles,
- (n_percentiles + 1) * sizeof (double));
- percentile_values = pool_realloc (int_pool, percentile_values,
- (n_percentiles + 1) * sizeof (double));
-
- if (i < n_percentiles)
- {
- memmove (&percentiles[i + 1], &percentiles[i],
- (n_percentiles - i) * sizeof (double));
- memmove (&percentile_values[i + 1], &percentile_values[i],
- (n_percentiles - i) * sizeof (double));
-
- }
+ percentiles
+ = pool_realloc (int_pool, percentiles,
+ (n_percentiles + 1) * sizeof *percentiles);
+ percentile_values
+ = pool_realloc (int_pool, percentile_values,
+ (n_percentiles + 1) * sizeof *percentile_values);
+ if (i < n_percentiles)
+ {
+ memmove (&percentiles[i + 1], &percentiles[i],
+ (n_percentiles - i) * sizeof *percentiles);
+ memmove (&percentile_values[i + 1], &percentile_values[i],
+ (n_percentiles - i) * sizeof *percentile_values);
+ }
percentiles[i] = x;
n_percentiles++;
}
\f
/* Comparison functions. */
+/* Hash of numeric values. */
+static unsigned
+hash_value_numeric (const void *value_, void *foo unused)
+{
+ const struct freq *value = value_;
+ return hsh_hash_double (value->v.f);
+}
+
+/* Hash of string values. */
+static unsigned
+hash_value_alpha (const void *value_, void *len_)
+{
+ const struct freq *value = value_;
+ int *len = len_;
+
+ return hsh_hash_bytes (value->v.s, *len);
+}
+
/* Ascending numeric compare of values. */
static int
-compare_value_numeric_a (const void *a, const void *b, void *foo unused)
+compare_value_numeric_a (const void *a_, const void *b_, void *foo unused)
{
- return approx_compare (((struct freq *) a)->v.f, ((struct freq *) b)->v.f);
+ const struct freq *a = a_;
+ const struct freq *b = b_;
+
+ if (a->v.f > b->v.f)
+ return 1;
+ else if (a->v.f < b->v.f)
+ return -1;
+ else
+ return 0;
}
/* Ascending string compare of values. */
static int
-compare_value_alpha_a (const void *a, const void *b, void *len)
+compare_value_alpha_a (const void *a_, const void *b_, void *v_)
{
- return memcmp (((struct freq *) a)->v.s, ((struct freq *) b)->v.s, (int) len);
+ const struct freq *a = a_;
+ const struct freq *b = b_;
+ const struct variable *v = v_;
+
+ return memcmp (a->v.s, b->v.s, v->width);
}
/* Descending numeric compare of values. */
static int
compare_value_numeric_d (const void *a, const void *b, void *foo unused)
{
- return approx_compare (((struct freq *) b)->v.f, ((struct freq *) a)->v.f);
+ return -compare_value_numeric_a (a, b, foo);
}
/* Descending string compare of values. */
static int
-compare_value_alpha_d (const void *a, const void *b, void *len)
+compare_value_alpha_d (const void *a, const void *b, void *v)
{
- return memcmp (((struct freq *) b)->v.s, ((struct freq *) a)->v.s, (int) len);
+ return -compare_value_alpha_a (a, b, v);
}
/* Ascending numeric compare of frequency;
secondary key on ascending numeric value. */
static int
-compare_freq_numeric_a (const void *a, const void *b, void *foo unused)
+compare_freq_numeric_a (const void *a_, const void *b_, void *foo unused)
{
- int x = approx_compare (((struct freq *) a)->c, ((struct freq *) b)->c);
- return x ? x : approx_compare (((struct freq *) a)->v.f, ((struct freq *) b)->v.f);
+ const struct freq *a = a_;
+ const struct freq *b = b_;
+
+ if (a->v.c > b->v.c)
+ return 1;
+ else if (a->v.c < b->v.c)
+ return -1;
+
+ if (a->v.f > b->v.f)
+ return 1;
+ else if (a->v.f < b->v.f)
+ return -1;
+ else
+ return 0;
}
/* Ascending numeric compare of frequency;
secondary key on ascending string value. */
static int
-compare_freq_alpha_a (const void *a, const void *b, void *len)
+compare_freq_alpha_a (const void *a_, const void *b_, void *v_)
{
- int x = approx_compare (((struct freq *) a)->c, ((struct freq *) b)->c);
- return x ? x : memcmp (((struct freq *) a)->v.s, ((struct freq *) b)->v.s, (int) len);
+ const struct freq *a = a_;
+ const struct freq *b = b_;
+ const struct variable *v = v_;
+
+ if (a->v.c > b->v.c)
+ return 1;
+ else if (a->v.c < b->v.c)
+ return -1;
+ else
+ return memcmp (a->v.s, b->v.s, v->width);
}
/* Descending numeric compare of frequency;
secondary key on ascending numeric value. */
static int
-compare_freq_numeric_d (const void *a, const void *b, void *foo unused)
+compare_freq_numeric_d (const void *a_, const void *b_, void *foo unused)
{
- int x = approx_compare (((struct freq *) b)->c, ((struct freq *) a)->c);
- return x ? x : approx_compare (((struct freq *) a)->v.f, ((struct freq *) b)->v.f);
+ const struct freq *a = a_;
+ const struct freq *b = b_;
+
+ if (a->v.c > b->v.c)
+ return -1;
+ else if (a->v.c < b->v.c)
+ return 1;
+
+ if (a->v.f > b->v.f)
+ return 1;
+ else if (a->v.f < b->v.f)
+ return -1;
+ else
+ return 0;
}
/* Descending numeric compare of frequency;
secondary key on ascending string value. */
static int
-compare_freq_alpha_d (const void *a, const void *b, void *len)
+compare_freq_alpha_d (const void *a_, const void *b_, void *v_)
{
- int x = approx_compare (((struct freq *) b)->c, ((struct freq *) a)->c);
- return x ? x : memcmp (((struct freq *) a)->v.s, ((struct freq *) b)->v.s, (int) len);
+ const struct freq *a = a_;
+ const struct freq *b = b_;
+ const struct variable *v = v_;
+
+ if (a->v.c > b->v.c)
+ return -1;
+ else if (a->v.c < b->v.c)
+ return 1;
+ else
+ return memcmp (a->v.s, b->v.s, v->width);
}
\f
/* Frequency table display. */
if (lab)
{
- char *label = get_val_lab (v, f->v, 0);
+ const char *label = val_labs_find (v->val_labs, f->v);
if (label != NULL)
tab_text (t, 0, r, TAB_LEFT, label);
}
if (lab)
{
- char *label = get_val_lab (v, f->v, 0);
+ const char *label = val_labs_find (v->val_labs, f->v);
if (label != NULL)
tab_text (t, 0, r, TAB_LEFT, label);
}
struct freq *f;
int most_often;
- double cum_percent=0;
- int i=0;
- double previous_value=SYSMIS;
+ double cum_percent;
+ int i = 0;
+ double previous_value;
+ /* Calculate the mean. */
+ X_bar = 0.0;
+ for (f = v->p.frq.tab.valid; f < v->p.frq.tab.missing; f++)
+ X_bar += f->v.f * f->c;
+ X_bar /= W;
+ /* Calculate percentiles. */
+ cum_percent = 0;
+ previous_value = SYSMIS;
+ for (f = v->p.frq.tab.valid; f < v->p.frq.tab.missing; f++)
+ {
+ cum_percent += f->c / v->p.frq.tab.valid_cases;
+ for (; i < n_percentiles; i++)
+ {
+ if (cum_percent <= percentiles[i])
+ break;
+
+ percentile_values[i] = previous_value;
+ }
+ previous_value = f->v.f;
+ }
- /* Calculate the mean and mode */
- X_bar = 0.0;
+ /* Calculate the mode. */
most_often = -1;
X_mode = SYSMIS;
for (f = v->p.frq.tab.valid; f < v->p.frq.tab.missing; f++)
{
-
-
- cum_percent += f->c / v->p.frq.tab.valid_cases ;
-
-
- for(;i < n_percentiles ; ++i)
- {
-
-
- if (cum_percent <= percentiles[i])
- break;
-
- percentile_values[i]=previous_value;
-
- }
-
-
- /* mean */
- X_bar += f->v.f * f->c;
-
- /* mode */
- if (most_often < f->c )
- {
- most_often=f->c;
- X_mode= f->v.f;
- }
- else if ( most_often == f->c )
- {
- /* if there are 2 values , then mode is undefined */
- X_mode=SYSMIS;
- }
-
- previous_value=f->v.f;
+ if (most_often < f->c)
+ {
+ most_often = f->c;
+ X_mode = f->v.f;
+ }
+ else if (most_often == f->c)
+ {
+ /* A duplicate mode is undefined.
+ FIXME: keep track of *all* the modes. */
+ X_mode = SYSMIS;
+ }
}
- X_bar /= W;
/* Calculate moments about the mean. */
M2 = M3 = M4 = 0.0;
/* Formulas below are taken from _SPSS Statistical Algorithms_. */
d[frq_min] = v->p.frq.tab.valid[0].v.f;
- d[frq_max] = v->p.frq.tab.missing[-1].v.f;
+ d[frq_max] = v->p.frq.tab.valid[v->p.frq.tab.n_valid - 1].v.f;
d[frq_mode] = X_mode;
d[frq_range] = d[frq_max] - d[frq_min];
d[frq_median] = SYSMIS;
r++;
}
- for ( i=0 ; i < n_percentiles ; ++i,++r ) {
- struct string ds;
-
- ds_init(gen_pool, &ds, 20 );
-
- ds_printf(&ds,"%s %d",_("Percentile"),(int)(percentiles[i]*100));
+ for (i = 0; i < n_percentiles; i++, r++)
+ {
+ struct string ds;
+ ds_init (gen_pool, &ds, 20);
+ ds_printf (&ds, "%s %d", _("Percentile"), (int) (percentiles[i] * 100));
- tab_text(t,0,r, TAB_LEFT | TAT_TITLE, ds.string);
- tab_float(t,1,r,TAB_NONE,percentile_values[i],11,3);
+ tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, ds.string);
+ tab_float (t, 1, r, TAB_NONE, percentile_values[i], 11, 3);
- ds_destroy(&ds);
- }
+ ds_destroy (&ds);
+ }
tab_columns (t, SOM_COL_DOWN, 1);
if (show_varname)
tab_submit (t);
}
-\f
-#if 0
-/* Statistical calculation. */
-
-static int degree[6];
-static int maxdegree, minmax;
-
-static void stat_func (struct freq *, VISIT, int);
-static void calc_stats (int);
-static void display_stats (int);
-
-/* mapping of data[]:
- * 0=>8
- * 1=>9
- * 2=>10
- * index 3: number of modes found (detects multiple modes)
- * index 4: number of nodes processed, for calculation of median
- * 5=>11
- *
- * mapping of dbl[]:
- * index 0-3: sum of X**i
- * index 4: minimum
- * index 5: maximum
- * index 6: mode
- * index 7: median
- * index 8: number of cases, valid and missing
- * index 9: number of valid cases
- * index 10: maximum frequency found, for calculation of mode
- * index 11: maximum frequency
- */
-static void
-out_stats (int i)
-{
- int j;
-
- if (cur_var->type == ALPHA)
- return;
- for (j = 0; j < 8; j++)
- cur_var->dbl[j] = 0.;
- cur_var->dbl[10] = 0;
- cur_var->dbl[4] = DBL_MAX;
- cur_var->dbl[5] = -DBL_MAX;
- for (j = 2; j < 5; j++)
- cur_var->data[j] = 0;
- cur_var->p.frq.median_ncases = cur_var->p.frq.t.valid_cases / 2;
- avlwalk (cur_var->p.frq.t.f, stat_func, LEFT_TO_RIGHT);
- calc_stats (i);
- display_stats (i);
-}
-
-static void
-calc_stats (int i)
-{
- struct variable *v;
- double n;
- double *d;
-
- v = v_variables[i];
- n = v->p.frq.t.valid_cases;
- d = v->dbl;
-
- if (n < 2 || (n < 3 && stat[FRQ_ST_7]))
- {
- warn (_("only %g case%s for variable %s, statistics not "
- "computed"), n, n == 1 ? "" : "s", v->name);
- return;
- }
- if (stat[FRQ_ST_9])
- v->res[FRQ_ST_9] = d[5] - d[4];
- if (stat[FRQ_ST_10])
- v->res[FRQ_ST_10] = d[4];
- if (stat[FRQ_ST_11])
- v->res[FRQ_ST_11] = d[5];
- if (stat[FRQ_ST_12])
- v->res[FRQ_ST_12] = d[0];
- if (stat[FRQ_ST_1] || stat[FRQ_ST_2] || stat[FRQ_ST_5] || stat[FRQ_ST_6] || stat[FRQ_ST_7])
- {
- v->res[FRQ_ST_1] = calc_mean (d, n);
- v->res[FRQ_ST_6] = calc_variance (d, n);
- }
- if (stat[FRQ_ST_2] || stat[FRQ_ST_5] || stat[FRQ_ST_7])
- v->res[FRQ_ST_5] = calc_stddev (v->res[FRQ_ST_6]);
- if (stat[FRQ_ST_2])
- v->res[FRQ_ST_2] = calc_semean (v->res[FRQ_ST_5], n);
- if (stat[FRQ_ST_7])
- {
- v->res[FRQ_ST_7] = calc_kurt (d, n, v->res[FRQ_ST_6]);
- v->res[FRQ_ST_14] = calc_sekurt (n);
- }
- if (stat[FRQ_ST_8])
- {
- v->res[FRQ_ST_8] = calc_skew (d, n, v->res[FRQ_ST_5]);
- v->res[FRQ_ST_15] = calc_seskew (n);
- }
- if (stat[FRQ_ST_MODE])
- {
- v->res[FRQ_ST_MODE] = v->dbl[6];
- if (v->data[3] > 1)
- warn (_("The variable %s has %d modes. The lowest of these "
- "is the one given in the table."), v->name, v->data[3]);
- }
- if (stat[FRQ_ST_MEDIAN])
- v->res[FRQ_ST_MEDIAN] = v->dbl[7];
-}
-
-static void
-stat_func (struct freq * x, VISIT order, int param)
-{
- double d, f;
-
- if (order != INORDER)
- return;
- f = d = x->v.f;
- cur_var->dbl[0] += (d * x->c);
- switch (maxdegree)
- {
- case 1:
- f *= d;
- cur_var->dbl[1] += (f * x->c);
- break;
- case 2:
- f *= d;
- cur_var->dbl[1] += (f * x->c);
- f *= d;
- cur_var->dbl[2] += (f * x->c);
- break;
- case 3:
- f *= d;
- cur_var->dbl[1] += (f * x->c);
- f *= d;
- cur_var->dbl[2] += (f * x->c);
- f *= d;
- cur_var->dbl[3] += (f * x->c);
- break;
- }
- if (minmax)
- {
- if (d < cur_var->dbl[4])
- cur_var->dbl[4] = d;
- if (d > cur_var->dbl[5])
- cur_var->dbl[5] = d;
- }
- if (x->c > cur_var->dbl[10])
- {
- cur_var->data[3] = 1;
- cur_var->dbl[10] = x->c;
- cur_var->dbl[6] = x->v.f;
- }
- else if (x->c == cur_var->dbl[10])
- cur_var->data[3]++;
- if (cur_var->data[4] < cur_var->p.frq.median_ncases
- && cur_var->data[4] + x->c >= cur_var->p.frq.median_ncases)
- cur_var->dbl[7] = x->v.f;
- cur_var->data[4] += x->c;
-}
-\f
-/* Statistical display. */
-static int column, ncolumns;
-
-static void outstat (char *, double);
-
-static void
-display_stats (int i)
-{
- statname *sp;
- struct variable *v;
- int nlines;
-
- v = v_variables[i];
- ncolumns = (margin_width + 3) / 26;
- if (ncolumns < 1)
- ncolumns = 1;
- nlines = sc / ncolumns + (sc % ncolumns > 0);
- if (nlines == 2 && sc == 4)
- ncolumns = 2;
- if (nlines == 3 && sc == 9)
- ncolumns = 3;
- if (nlines == 4 && sc == 12)
- ncolumns = 3;
- column = 0;
- for (sp = st_name; sp->s != -1; sp++)
- if (stat[sp->s] == 1)
- outstat (gettext (sp->s10), v->res[sp->s]);
- if (column)
- out_eol ();
- blank_line ();
-}
-
-static void
-outstat (char *label, double value)
-{
- char buf[128], *cp;
- int dw, n;
-
- cp = &buf[0];
- if (!column)
- out_header ();
- else
- {
- memset (buf, ' ', 3);
- cp = &buf[3];
- }
- dw = 4;
- n = nsprintf (cp, "%-10s %12.4f", label, value);
- while (n > 23 && dw > 0)
- n = nsprintf (cp, "%-10s %12.*f", label, --dw, value);
- outs (buf);
- column++;
- if (column == ncolumns)
- {
- column = 0;
- out_eol ();
- }
-}
-\f
-/* Graphs. */
-
-static rect pb, gb; /* Page border, graph border. */
-static int px, py; /* Page width, height. */
-static int ix, iy; /* Inch width, height. */
-
-static void draw_bar_chart (int);
-static void draw_histogram (int);
-static int scale_dep_axis (int);
-
-static void
-out_graphs (int i)
-{
- struct variable *v;
-
- v = v_variables[i];
- if (avlcount (cur_var->p.frq.t.f) < 2
- || (chart == HIST && v_variables[i]->type == ALPHA))
- return;
- if (driver_id && set_highres == 1)
- {
- char *text;
-
- graf_page_size (&px, &py, &ix, &iy);
- graf_feed_page ();
-
- /* Calculate borders. */
- pb.x1 = ix;
- pb.y1 = iy;
- pb.x2 = px - ix;
- pb.y2 = py - iy;
- gb.x1 = pb.x1 + ix;
- gb.y1 = pb.y1 + iy;
- gb.x2 = pb.x2 - ix / 2;
- gb.y2 = pb.y2 - iy;
-
- /* Draw borders. */
- graf_frame_rect (COMPONENTS (pb));
- graf_frame_rect (COMPONENTS (gb));
-
- /* Draw axis labels. */
- graf_font_size (iy / 4); /* 18-point text */
- text = format == PERCENT ? _("Percentage") : _("Frequency");
- graf_text (pb.x1 + max (ix, iy) / 4 + max (ix, iy) / 16, gb.y2, text,
- SIDEWAYS);
- text = v->label ? v->label : v->name;
- graf_text (gb.x1, pb.y2 - iy / 4, text, UPRIGHT);
-
- /* Draw axes, chart proper. */
- if (chart == BAR ||
- (chart == HBAR
- && (avlcount (cur_var->p.frq.t.f) || v_variables[i]->type == ALPHA)))
- draw_bar_chart (i);
- else
- draw_histogram (i);
-
- graf_eject_page ();
- }
- if (set_lowres == 1 || (set_lowres == 2 && (!driver_id || !set_highres)))
- {
- static warned;
-
- /* Do character-based graphs. */
- if (!warned)
- {
- warn (_("low-res graphs not implemented"));
- warned = 1;
- }
- }
-}
-
-#if __GNUC__ && !__CHECKER__
-#define BIG_TYPE long long
-#else /* !__GNUC__ */
-#define BIG_TYPE double
-#endif /* !__GNUC__ */
-
-static void
-draw_bar_chart (int i)
-{
- int bar_width, bar_spacing;
- int w, max, row;
- double val;
- struct freq *f;
- rect r;
- AVLtraverser *t = NULL;
-
- w = (px - ix * 7 / 2) / avlcount (cur_var->p.frq.t.f);
- bar_width = w * 2 / 3;
- bar_spacing = w - bar_width;
-
-#if !ALLOW_HUGE_BARS
- if (bar_width > ix / 2)
- bar_width = ix / 2;
-#endif /* !ALLOW_HUGE_BARS */
-
- max = scale_dep_axis (cur_var->p.frq.t.max_freq);
-
- row = 0;
- r.x1 = gb.x1 + bar_spacing / 2;
- r.x2 = r.x1 + bar_width;
- r.y2 = gb.y2;
- graf_fill_color (255, 0, 0);
- for (f = avltrav (cur_var->p.frq.t.f, &t); f;
- f = avltrav (cur_var->p.frq.t.f, &t))
- {
- char buf2[64];
- char *buf;
-
- val = f->c;
- if (format == PERCENT)
- val = val * 100 / cur_var->p.frq.t.valid_cases;
- r.y1 = r.y2 - val * (height (gb) - 1) / max;
- graf_fill_rect (COMPONENTS (r));
- graf_frame_rect (COMPONENTS (r));
- buf = get_val_lab (cur_var, f->v, 0);
- if (!buf)
- if (cur_var->type == ALPHA)
- buf = f->v.s;
- else
- {
- sprintf (buf2, "%g", f->v.f);
- buf = buf2;
- }
- graf_text (r.x1 + bar_width / 2,
- gb.y2 + iy / 32 + row * iy / 9, buf, TCJUST);
- row ^= 1;
- r.x1 += bar_width + bar_spacing;
- r.x2 += bar_width + bar_spacing;
- }
- graf_fill_color (0, 0, 0);
-}
-
-#define round_down(X, V) \
- (floor ((X) / (V)) * (V))
-#define round_up(X, V) \
- (ceil ((X) / (V)) * (V))
-
-static void
-draw_histogram (int i)
-{
- double lower, upper, interval;
- int bars[MAX_HIST_BARS + 1], top, j;
- int err, addend, rem, nbars, row, max_freq;
- char buf[25];
- rect r;
- struct freq *f;
- AVLtraverser *t = NULL;
-
- lower = min == SYSMIS ? cur_var->dbl[4] : min;
- upper = max == SYSMIS ? cur_var->dbl[5] : max;
- if (upper - lower >= 10)
- {
- double l, u;
-
- u = round_up (upper, 5);
- l = round_down (lower, 5);
- nbars = (u - l) / 5;
- if (nbars * 2 + 1 <= MAX_HIST_BARS)
- {
- nbars *= 2;
- u = round_up (upper, 2.5);
- l = round_down (lower, 2.5);
- if (l + 1.25 <= lower && u - 1.25 >= upper)
- nbars--, lower = l + 1.25, upper = u - 1.25;
- else if (l + 1.25 <= lower)
- lower = l + 1.25, upper = u + 1.25;
- else if (u - 1.25 >= upper)
- lower = l - 1.25, upper = u - 1.25;
- else
- nbars++, lower = l - 1.25, upper = u + 1.25;
- }
- else if (nbars < MAX_HIST_BARS)
- {
- if (l + 2.5 <= lower && u - 2.5 >= upper)
- nbars--, lower = l + 2.5, upper = u - 2.5;
- else if (l + 2.5 <= lower)
- lower = l + 2.5, upper = u + 2.5;
- else if (u - 2.5 >= upper)
- lower = l - 2.5, upper = u - 2.5;
- else
- nbars++, lower = l - 2.5, upper = u + 2.5;
- }
- else
- nbars = MAX_HIST_BARS;
- }
- else
- {
- nbars = avlcount (cur_var->p.frq.t.f);
- if (nbars > MAX_HIST_BARS)
- nbars = MAX_HIST_BARS;
- }
- if (nbars < MIN_HIST_BARS)
- nbars = MIN_HIST_BARS;
- interval = (upper - lower) / nbars;
-
- memset (bars, 0, sizeof (int[nbars + 1]));
- if (lower >= upper)
- {
- msg (SE, _("Could not make histogram for %s for specified "
- "minimum %g and maximum %g; please discard graph."), cur_var->name,
- lower, upper);
- return;
- }
- for (f = avltrav (cur_var->p.frq.t.f, &t); f;
- f = avltrav (cur_var->p.frq.t.f, &t))
- if (f->v.f == upper)
- bars[nbars - 1] += f->c;
- else if (f->v.f >= lower && f->v.f < upper)
- bars[(int) ((f->v.f - lower) / interval)] += f->c;
- bars[nbars - 1] += bars[nbars];
- for (j = top = 0; j < nbars; j++)
- if (bars[j] > top)
- top = bars[j];
- max_freq = top;
- top = scale_dep_axis (top);
-
- err = row = 0;
- addend = width (gb) / nbars;
- rem = width (gb) % nbars;
- r.x1 = gb.x1;
- r.x2 = r.x1 + addend;
- r.y2 = gb.y2;
- err += rem;
- graf_fill_color (255, 0, 0);
- for (j = 0; j < nbars; j++)
- {
- int w;
-
- r.y1 = r.y2 - (BIG_TYPE) bars[j] * (height (gb) - 1) / top;
- graf_fill_rect (COMPONENTS (r));
- graf_frame_rect (COMPONENTS (r));
- sprintf (buf, "%g", lower + interval / 2 + interval * j);
- graf_text (r.x1 + addend / 2,
- gb.y2 + iy / 32 + row * iy / 9, buf, TCJUST);
- row ^= 1;
- w = addend;
- err += rem;
- while (err >= addend)
- {
- w++;
- err -= addend;
- }
- r.x1 = r.x2;
- r.x2 = r.x1 + w;
- }
- if (normal)
- {
- double x, y, variance, mean, step, factor;
-
- variance = cur_var->res[FRQ_ST_VARIANCE];
- mean = cur_var->res[FRQ_ST_MEAN];
- factor = (1. / (sqrt (2. * PI * variance))
- * cur_var->p.frq.t.valid_cases * interval);
- graf_polyline_begin ();
- for (x = lower, step = (upper - lower) / (POLYLINE_DENSITY);
- x <= upper; x += step)
- {
- y = factor * exp (-square (x - mean) / (2. * variance));
- debug_printf (("(%20.10f, %20.10f)\n", x, y));
- graf_polyline_point (gb.x1 + (x - lower) / (upper - lower) * width (gb),
- gb.y2 - y * (height (gb) - 1) / top);
- }
- graf_polyline_end ();
- }
- graf_fill_color (0, 0, 0);
-}
-
-static int
-scale_dep_axis (int max)
-{
- int j, s, x, y, ty, by;
- char buf[10];
-
- x = 10, s = 2;
- if (scale != SYSMIS && max < scale)
- x = scale, s = scale / 5;
- else if (format == PERCENT)
- {
- max = ((BIG_TYPE) 100 * cur_var->p.frq.t.max_freq
- / cur_var->p.frq.t.valid_cases + 1);
- if (max < 5)
- x = 5, s = 1;
- else if (max < 10)
- x = 10, s = 2;
- else if (max < 25)
- x = 25, s = 5;
- else if (max < 50)
- x = 50, s = 10;
- else
- max = 100, s = 20;
- }
- else /* format==FREQ */
- /* Uses a progression of 10, 20, 50, 100, 200, 500, ... */
- for (;;)
- {
- if (x > max)
- break;
- x *= 2;
- s *= 2;
- if (x > max)
- break;
- x = x / 2 * 5;
- s = s / 2 * 5;
- if (x > max)
- break;
- x *= 2;
- s *= 2;
- }
- graf_font_size (iy / 9); /* 8-pt text */
- for (j = 0; j <= x; j += s)
- {
- y = gb.y2 - (BIG_TYPE) j *(height (gb) - 1) / x;
- ty = y - iy / 64;
- by = y + iy / 64;
- if (ty < gb.y1)
- ty += iy / 64, by += iy / 64;
- else if (by > gb.y2)
- ty -= iy / 64, by -= iy / 64;
- graf_fill_rect (gb.x1 - ix / 16, ty, gb.x1, by);
- sprintf (buf, "%d", j);
- graf_text (gb.x1 - ix / 8, (ty + by) / 2, buf, CRJUST);
- }
- return x;
-}
-\f
-/* Percentiles. */
-
-static void ungrouped_pcnt (int i);
-static int grouped_interval_pcnt (int i);
-static void out_pcnt (double, double);
-
-static void
-out_percentiles (int i)
-{
- if (cur_var->type == ALPHA || !n_percentiles)
- return;
-
- outs_line (_("Percentile Value "
- "Percentile Value "
- "Percentile Value"));
- blank_line ();
-
- column = 0;
- if (!g_var[i])
- ungrouped_pcnt (i);
- else if (g_var[i] == 1)
- grouped_interval_pcnt (i);
-#if 0
- else if (g_var[i] == -1)
- grouped_pcnt (i);
- else
- grouped_boundaries_pcnt (i);
-#else /* !0 */
- else
- warn (_("this form of percentiles not supported"));
-#endif
- if (column)
- out_eol ();
-}
-
-static void
-out_pcnt (double pcnt, double value)
-{
- if (!column)
- out_header ();
- else
- outs (" ");
- out ("%7.2f%13.3f", pcnt * 100., value);
- column++;
- if (column == 3)
- {
- out_eol ();
- column = 0;
- }
-}
-
-static void
-ungrouped_pcnt (int i)
-{
- AVLtraverser *t = NULL;
- struct freq *f;
- double *p, *e;
- int sum;
-
- p = percentiles;
- e = &percentiles[n_percentiles];
- sum = 0;
- for (f = avltrav (cur_var->p.frq.t.f, &t);
- f && p < e; f = avltrav (cur_var->p.frq.t.f, &t))
- {
- sum += f->c;
- while (sum >= p[0] * cur_var->p.frq.t.valid_cases && p < e)
- out_pcnt (*p++, f->v.f);
- }
-}
-
-
-static int
-grouped_interval_pcnt (int i)
-{
- AVLtraverser * t = NULL;
- struct freq * f, *fp;
- double *p, *e, w;
- int sum, psum;
-
- p = percentiles;
- e = &percentiles[n_percentiles];
- w = gl_var[i][0];
- sum = psum = 0;
- for (fp = 0, f = avltrav (cur_var->p.frq.t.f, &t);
- f && p < e;
- fp = f, f = avltrav (cur_var->p.frq.t.f, &t))
- {
- if (fp)
- if (fabs (f->v.f - fp->v.f) < w)
- {
- out_eol ();
- column = 0;
- return msg (SE, _("Difference between %g and %g is "
- "too small for grouping interval %g."), f->v.f,
- fp->v.f, w);
- }
- psum = sum;
- sum += f->c;
- while (sum >= p[0] * cur_var->p.frq.t.valid_cases && p < e)
- {
- out_pcnt (p[0], (((p[0] * cur_var->p.frq.t.valid_cases) - psum) * w / f->c
- + (f->v.f - w / 2)));
- p++;
- }
- }
- return 1;
-}
-#endif
-
/*
Local Variables:
mode: c
#include <assert.h>
#include <stdlib.h>
#include "alloc.h"
-#include "avl.h"
#include "command.h"
#include "error.h"
#include "file-handle.h"
+#include "hash.h"
#include "lexer.h"
#include "misc.h"
#include "pfm.h"
#include "settings.h"
#include "sfm.h"
#include "str.h"
+#include "value-labels.h"
#include "var.h"
#include "vfm.h"
#include "vfmP.h"
}
for (i = 0; i < nv; i++)
- avl_force_delete (dict->var_by_name, v[i]);
+ hsh_force_delete (dict->name_tab, v[i]);
for (i = 0; i < nv; i++)
{
strcpy (v[i]->name, new_names[i]);
- if (NULL != avl_insert (dict->var_by_name, v[i]))
+ if (NULL != hsh_insert (dict->name_tab, v[i]))
{
msg (SE, _("Duplicate variables name %s."), v[i]->name);
goto lossage;
assert (!mv || mv->type == ALPHA || mv->width == 0);
if (mv && dv->width == mv->width)
{
- if (dv->val_lab && !mv->val_lab)
- mv->val_lab = copy_value_labels (dv->val_lab);
+ if (val_labs_count (dv->val_labs)
+ && !val_labs_count (mv->val_labs))
+ mv->val_labs = val_labs_copy (dv->val_labs);
if (dv->miss_type != MISSING_NONE && mv->miss_type == MISSING_NONE)
copy_missing_values (mv, dv);
}
#endif
#include "alloc.h"
-#include "avl.h"
#include "command.h"
#include "do-ifP.h"
#include "error.h"
#include "expr.h"
#include "filename.h"
#include "getline.h"
+#include "hash.h"
#include "julcal/julcal.h"
#include "lexer.h"
#include "magic.h"
#endif
/* var.h */
- default_dict.var_by_name = avl_create (NULL, cmp_variable, NULL);
+ default_dict.name_tab = hsh_create (8, compare_variables, hash_variable,
+ NULL, NULL);
vec_init (&reinit_sysmis);
vec_init (&reinit_blanks);
sprintf (curdate, "%2d %s %04d", dy, gettext (months[mn]), yr);
}
-int
-cmp_variable (const void *a, const void *b, void *foo unused)
-{
- return strcmp (((struct variable *) a)->name, ((struct variable *) b)->name);
-}
-
#if __BORLANDC__
int
_RTLENTRY _EXPFUNC _matherr (struct exception _FAR *__e)
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
+#include "algorithm.h"
#include "alloc.h"
#include "hash.h"
-#include "quicksort.h"
+#include "misc.h"
#include "str.h"
/* Hash table. */
size_t size; /* Number of entries (a power of 2). */
void **entries; /* Hash table proper. */
- void *param;
+ void *aux; /* Auxiliary data for comparison functions. */
hsh_compare_func *compare;
hsh_hash_func *hash;
hsh_free_func *free;
{
const unsigned char *buf = buf_;
unsigned hash = 0;
+
+ assert (buf != NULL);
while (size-- > 0)
{
hash += *buf++;
{
const unsigned char *s = s_;
unsigned hash = 0;
+
+ assert (s != NULL);
while (*s != '\0')
{
hash += *s++;
{
return hsh_hash_bytes (&i, sizeof i);
}
+
+/* Hash for double. */
+unsigned
+hsh_hash_double (double d)
+{
+ if (!isnan (d))
+ return hsh_hash_bytes (&d, sizeof d);
+ else
+ return 0;
+}
\f
/* Hash tables. */
for an entry; FREE destroys an entry. */
struct hsh_table *
hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
- hsh_free_func *free, void *param)
+ hsh_free_func *free, void *aux)
{
- struct hsh_table *h = xmalloc (sizeof *h);
+ struct hsh_table *h;
int i;
+ assert (size > 0);
+ assert (compare != NULL);
+ assert (hash != NULL);
+
+ h = xmalloc (sizeof *h);
h->used = 0;
+ if (size < 4)
+ size = 4;
h->size = next_power_of_2 (size);
h->entries = xmalloc (sizeof *h->entries * h->size);
for (i = 0; i < h->size; i++)
h->entries[i] = NULL;
- h->param = param;
+ h->aux = aux;
h->compare = compare;
h->hash = hash;
h->free = free;
{
int i;
+ assert (h != NULL);
if (h->free)
for (i = 0; i < h->size; i++)
if (h->entries[i] != NULL)
- h->free (h->entries[i], h->param);
+ h->free (h->entries[i], h->aux);
for (i = 0; i < h->size; i++)
h->entries[i] = NULL;
{
int i;
- if (h == NULL)
- return;
- if (h->free)
- for (i = 0; i < h->size; i++)
- if (h->entries[i] != NULL)
- h->free (h->entries[i], h->param);
- free (h->entries);
- free (h);
+ if (h != NULL)
+ {
+ if (h->free)
+ for (i = 0; i < h->size; i++)
+ if (h->entries[i] != NULL)
+ h->free (h->entries[i], h->aux);
+ free (h->entries);
+ free (h);
+ }
}
/* Changes the capacity of H to NEW_SIZE. */
static void
hsh_rehash (struct hsh_table *h, size_t new_size)
{
- void **begin = h->entries;
- void **end = &h->entries[h->size];
- void **table_p;
+ void **begin, **end, **table_p;
int i;
+ assert (h != NULL);
+ assert (new_size >= h->used);
+
+ begin = h->entries;
+ end = begin + h->size;
+
h->size = new_size;
h->entries = xmalloc (sizeof *h->entries * h->size);
for (i = 0; i < h->size; i++)
if (*table_p == NULL)
continue;
- entry = &h->entries[h->hash (*table_p, h->param) & (h->size - 1)];
+ entry = &h->entries[h->hash (*table_p, h->aux) & (h->size - 1)];
while (*entry)
if (--entry < h->entries)
entry = &h->entries[h->size - 1];
free (begin);
}
-/* hsh_sort() helper function that ensures NULLs are sorted after the
- rest of the table. */
+/* A "algo_predicate_func" that returns nonzero if DATA points
+ to a non-null void. */
static int
-sort_nulls_last (const void *a_, const void *b_, void *h_)
+not_null (const void *data_, void *aux unused)
+{
+ void *const *data = data_;
+
+ return *data != NULL;
+}
+
+/* Compacts hash table H and returns a pointer to its data. The
+ returned data consists of hsh_count(H) non-null pointers, in
+ no particular order, followed by a null pointer. After
+ calling this function, only hsh_destroy() and hsh_count() may
+ be applied to H. */
+void **
+hsh_data (struct hsh_table *h)
+{
+ size_t n;
+
+ assert (h != NULL);
+ n = partition (h->entries, h->size, sizeof *h->entries,
+ not_null, NULL);
+ assert (n == h->used);
+ return h->entries;
+}
+
+/* Dereferences void ** pointers and passes them to the hash
+ comparison function. */
+int
+comparison_helper (const void *a_, const void *b_, void *h_)
{
- void *a = *(void **) a_;
- void *b = *(void **) b_;
+ void *const *a = a_;
+ void *const *b = b_;
struct hsh_table *h = h_;
- if (a != NULL)
- {
- if (b != NULL)
- return h->compare (a, b, h->param);
- else
- return -1;
- }
- else
- {
- if (b != NULL)
- return +1;
- else
- return 0;
- }
+ return h->compare (*a, *b, h->aux);
}
-/* Sorts hash table H based on hash comparison function. NULLs
- are sent to the end of the table. The resultant table is
- returned (it is guaranteed to be NULL-terminated). H should
- not be used again as a hash table until and unless hsh_clear()
- called. */
+/* Sorts hash table H based on hash comparison function. The
+ returned data consists of hsh_count(H) non-null pointers,
+ sorted in order of the hash comparison function, followed by a
+ null pointer. After calling this function, only hsh_destroy()
+ and hsh_count() may be applied to H. */
void **
hsh_sort (struct hsh_table *h)
{
- quicksort (h->entries, h->size, sizeof *h->entries, sort_nulls_last, h);
+ assert (h != NULL);
+
+ hsh_data (h);
+ sort (h->entries, h->used, sizeof *h->entries, comparison_helper, h);
return h->entries;
}
+
+/* Makes and returns a copy of the pointers to the data in H.
+ The returned data consists of hsh_count(H) non-null pointers,
+ in no particular order, followed by a null pointer. The hash
+ table is not modified. The caller is responsible for freeing
+ the allocated data. */
+void **
+hsh_data_copy (struct hsh_table *h)
+{
+ void **copy;
+
+ assert (h != NULL);
+ copy = xmalloc ((h->used + 1) * sizeof *copy);
+ copy_if (h->entries, h->size, sizeof *h->entries, copy,
+ not_null, NULL);
+ copy[h->used] = NULL;
+ return copy;
+}
+
+/* Makes and returns a copy of the pointers to the data in H.
+ The returned data consists of hsh_count(H) non-null pointers,
+ sorted in order of the hash comparison function, followed by a
+ null pointer. The hash table is not modified. The caller is
+ responsible for freeing the allocated data. */
+void **
+hsh_sort_copy (struct hsh_table *h)
+{
+ void **copy;
+
+ assert (h != NULL);
+ copy = hsh_data_copy (h);
+ sort (copy, h->used, sizeof *copy, comparison_helper, h);
+ return copy;
+}
\f
/* Hash entries. */
-/* Searches hash table H for TARGET. If found, returns a pointer to a
- pointer to that entry; otherwise returns a pointer to a NULL entry
- which _must_ be used to insert a new entry having the same key
- data. */
+/* Searches hash table H for TARGET. If found, returns a pointer
+ to a pointer to that entry; otherwise returns a pointer to a
+ NULL entry which *must* be used to insert a new entry having
+ the same key data. */
inline void **
hsh_probe (struct hsh_table *h, const void *target)
{
void **entry;
+ assert (h != NULL);
+ assert (target != NULL);
+
/* Order of these statements is important! */
if (h->used > h->size / 2)
hsh_rehash (h, h->size * 2);
- entry = &h->entries[h->hash (target, h->param) & (h->size - 1)];
+ entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)];
while (*entry)
{
- if (!h->compare (*entry, target, h->param))
+ if (!h->compare (*entry, target, h->aux))
return entry;
if (--entry < h->entries)
entry = &h->entries[h->size - 1];
return entry;
}
+/* Searches hash table H for TARGET. If not found, inserts
+ TARGET and returns a null pointer. If found, returns the
+ match, without replacing it in the table. */
+void *
+hsh_insert (struct hsh_table *h, void *target)
+{
+ void **entry;
+
+ assert (h != NULL);
+ assert (target != NULL);
+
+ entry = hsh_probe (h, target);
+ if (*entry == NULL)
+ {
+ *entry = target;
+ return NULL;
+ }
+ else
+ return *entry;
+}
+
+/* Searches hash table H for TARGET. If not found, inserts
+ TARGET and returns a null pointer. If found, returns the
+ match, after replacing it in the table by TARGET. */
+void *
+hsh_replace (struct hsh_table *h, void *target)
+{
+ void **entry = hsh_probe (h, target);
+ void *old = *entry;
+ *entry = target;
+ return old;
+}
+
/* Locates an entry matching TARGET. Returns a pointer to the
entry, or a null pointer on failure. */
static inline void **
locate_matching_entry (struct hsh_table *h, const void *target)
{
- void **entry = &h->entries[h->hash (target, h->param) & (h->size - 1)];
+ void **entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)];
while (*entry)
{
- if (!h->compare (*entry, target, h->param))
+ if (!h->compare (*entry, target, h->aux))
return entry;
if (--entry < h->entries)
entry = &h->entries[h->size - 1];
hsh_delete (struct hsh_table *h, const void *target)
{
void **entry = locate_matching_entry (h, target);
- if (h->free != NULL)
+ if (entry != NULL)
{
- h->free (*entry, h->param);
+ if (h->free != NULL)
+ h->free (*entry, h->aux);
*entry = 0;
hsh_rehash (h, h->size);
return 1;
#include <stddef.h>
-typedef int hsh_compare_func (const void *, const void *, void *param);
-typedef unsigned hsh_hash_func (const void *, void *param);
-typedef void hsh_free_func (void *, void *param);
+typedef int hsh_compare_func (const void *, const void *, void *aux);
+typedef unsigned hsh_hash_func (const void *, void *aux);
+typedef void hsh_free_func (void *, void *aux);
/* Hash table iterator (opaque). */
struct hsh_iterator
size_t next; /* Index of next entry. */
};
-/* Prime numbers and hash functions. */
+/* Hash functions. */
unsigned hsh_hash_bytes (const void *, size_t);
unsigned hsh_hash_string (const char *);
unsigned hsh_hash_int (int);
+unsigned hsh_hash_double (double);
/* Hash tables. */
struct hsh_table *hsh_create (int m, hsh_compare_func *,
hsh_hash_func *, hsh_free_func *,
- void *param);
+ void *aux);
void hsh_clear (struct hsh_table *);
void hsh_destroy (struct hsh_table *);
void **hsh_sort (struct hsh_table *);
+void **hsh_data (struct hsh_table *);
+void **hsh_sort_copy (struct hsh_table *);
+void **hsh_data_copy (struct hsh_table *);
/* Search and insertion. */
void **hsh_probe (struct hsh_table *, const void *);
+void *hsh_insert (struct hsh_table *, void *);
+void *hsh_replace (struct hsh_table *, void *);
void *hsh_find (struct hsh_table *, const void *);
int hsh_delete (struct hsh_table *, const void *);
#include <stdio.h>
#include <assert.h>
#include "alloc.h"
-#include "avl.h"
#include "command.h"
+#include "hash.h"
#include "lexer.h"
#include "error.h"
#include "magic.h"
{
int i;
- temp_dict.var_by_name = avl_create (NULL, cmp_variable, NULL);
+ temp_dict.name_tab = hsh_create (8, compare_variables, hash_variable,
+ NULL, NULL);
for (i = 0; i < temp_dict.nvar; i++)
- avl_force_insert (temp_dict.var_by_name, temp_dict.var[i]);
+ hsh_force_insert (temp_dict.name_tab, temp_dict.var[i]);
}
}
else
}
if (cmd->sbc_variables)
- avl_destroy (temp_dict.var_by_name, NULL);
+ hsh_destroy (temp_dict.name_tab);
return 1;
}
#include <stdlib.h>
#include <assert.h>
+#include "algorithm.h"
#include "alloc.h"
-#include "avl.h"
#include "bitvector.h"
#include "command.h"
#include "error.h"
+#include "hash.h"
#include "lexer.h"
#include "misc.h"
#include "str.h"
#include "vfm.h"
/* FIXME: should change weighting variable, etc. */
-/* These control the way that compare_variables() does its work. */
-static int forward; /* 1=FORWARD, 0=BACKWARD. */
-static int positional; /* 1=POSITIONAL, 0=ALPHA. */
+/* These control the ordering produced by
+ compare_variables_given_ordering(). */
+struct ordering
+ {
+ int forward; /* 1=FORWARD, 0=BACKWARD. */
+ int positional; /* 1=POSITIONAL, 0=ALPHA. */
+ };
-static int compare_variables (const void *pa, const void *pb);
+static int compare_variables_given_ordering (const void *, const void *,
+ void *ordering);
/* Explains how to modify the variables in a dictionary in conjunction
with the p.mfv field of `variable'. */
lex_match ('=');
do
{
+ struct ordering ordering;
int prev_nv = nv;
- forward = positional = 1;
+ ordering.forward = ordering.positional = 1;
if (lex_match_id ("FORWARD"));
else if (lex_match_id ("BACKWARD"))
- forward = 0;
+ ordering.forward = 0;
if (lex_match_id ("POSITIONAL"));
else if (lex_match_id ("ALPHA"))
- positional = 0;
+ ordering.positional = 0;
if (lex_match (T_ALL) || token == '/' || token == '.')
{
goto lossage;
}
}
- qsort (&v[prev_nv], nv - prev_nv, sizeof *v, compare_variables);
+ sort (&v[prev_nv], nv - prev_nv, sizeof *v,
+ compare_variables_given_ordering, &ordering);
}
while (token != '/' && token != '.');
}
else if (lex_match_id ("KEEP"))
{
+ struct ordering ordering;
struct variable **keep_vars;
int nv;
int counter;
/* Transform the list of variables to keep into a list of
variables to drop. First sort the keep list, then figure
out which variables are missing. */
- forward = positional = 1;
- qsort (keep_vars, nv, sizeof *keep_vars, compare_variables);
+ ordering.forward = ordering.positional = 1;
+ sort (keep_vars, nv, sizeof *keep_vars,
+ compare_variables_given_ordering, &ordering);
vm.n_drop = default_dict.nvar - nv;
}
}
-/* Compares a pair of variables according to the settings in `forward'
- and `positional', returning a strcmp()-type result. */
+/* Compares A and B according to the settings in
+ ORDERING, returning a strcmp()-type result. */
static int
-compare_variables (const void *pa, const void *pb)
+compare_variables_given_ordering (const void *a_, const void *b_,
+ void *ordering_)
{
- const struct variable *a = *(const struct variable **) pa;
- const struct variable *b = *(const struct variable **) pb;
-
- int result = positional ? a->index - b->index : strcmp (a->name, b->name);
- return forward ? result : -result;
+ struct variable *const *pa = a_;
+ struct variable *const *pb = b_;
+ const struct variable *a = *pa;
+ const struct variable *b = *pb;
+ const struct ordering *ordering = ordering_;
+
+ int result;
+ if (ordering->positional)
+ result = a->index < b->index ? -1 : a->index > b->index;
+ else
+ result = strcmp (a->name, b->name);
+ if (!ordering->forward)
+ result = -result;
+ return result;
}
/* (Possibly) rearranges variables and (possibly) removes some
/* Check for duplicate variable names if appropriate. */
if (permanent && vm->n_rename)
{
+ struct ordering ordering;
struct variable **v;
if (vm->reorder_list)
else
v = xmalloc (sizeof *v * n->nvar);
memcpy (v, n->var, sizeof *v * n->nvar);
- forward = 1, positional = 0;
- qsort (v, n->nvar, sizeof *v, compare_variables);
+ ordering.forward = 1;
+ ordering.positional = 0;
+ sort (v, n->nvar, sizeof *v,
+ compare_variables_given_ordering, &ordering);
for (i = 1; i < n->nvar; i++)
if (!strcmp (n->var[i]->name, n->var[i - 1]->name))
{
for (i = 0; i < n->nvar; i++)
if (n->var[i]->p.mfv.new_name[0])
{
- avl_force_delete (n->var_by_name, n->var[i]);
+ hsh_force_delete (n->name_tab, n->var[i]);
if (head)
tail = tail->p.mfv.next = n->var[i];
else
for (; head; head = head->p.mfv.next)
{
strcpy (head->name, head->p.mfv.new_name);
- avl_force_insert (n->var_by_name, head);
+ hsh_force_insert (n->name_tab, head);
}
free (save_var);
#include <errno.h>
#include <math.h>
#include "alloc.h"
-#include "avl.h"
#include "file-handle.h"
#include "format.h"
#include "getline.h"
+#include "hash.h"
#include "magic.h"
#include "misc.h"
#include "pfm.h"
#include "str.h"
+#include "value-labels.h"
#include "var.h"
#include "debug-print.h"
{
union value val;
char *label;
- struct value_label *vl;
int j;
goto lossage;
asciify (label);
- /* Create a label. */
- vl = xmalloc (sizeof *vl);
- vl->v = val;
- vl->s = xstrdup (label);
- vl->ref_count = nv;
-
/* Assign the value_label's to each variable. */
for (j = 0; j < nv; j++)
{
struct variable *var = v[j];
- struct value_label *old;
-
- /* Create AVL tree if necessary. */
- if (!var->val_lab)
- var->val_lab = avl_create (NULL, val_lab_cmp,
- (void *) (var->width));
- old = avl_replace (var->val_lab, vl);
- if (old == NULL)
+ if (!val_labs_replace (var->val_labs, val, label))
continue;
if (var->type == NUMERIC)
lose ((h, _("Duplicate label for value %g for variable %s."),
- vl->v.f, var->name));
+ val.f, var->name));
else
lose ((h, _("Duplicate label for value `%.*s' for variable %s."),
- var->width, vl->v.s, var->name));
-
- free_value_label (old);
+ var->width, val.s, var->name));
}
}
free (v);
#include <stdlib.h>
#include <time.h>
#include "alloc.h"
-#include "avl.h"
#include "error.h"
#include "file-handle.h"
#include "gmp.h"
+#include "hash.h"
#include "magic.h"
#include "pfm.h"
#include "str.h"
+#include "value-labels.h"
#include "var.h"
#include "version.h"
for (i = 0; i < dict->nvar; i++)
{
- avl_traverser iter;
+ struct val_labs_iterator *j;
struct variable *v = dict->var[i];
- struct value_label *vl;
+ struct val_lab *vl;
- if (v->val_lab == NULL)
+ if (!val_labs_count (v->val_labs))
continue;
if (!bufwrite (h, "D", 1)
|| !write_int (h, 1)
|| !write_string (h, v->name)
- || !write_int (h, avl_count (v->val_lab)))
+ || !write_int (h, val_labs_count (v->val_labs)))
return 0;
- avl_traverser_init (iter);
- while (NULL != (vl = avl_traverse (v->val_lab, &iter)))
- if (!write_value (h, &vl->v, v)
- || !write_string (h, vl->s))
- return 0;
+ for (vl = val_labs_first_sorted (v->val_labs, &j); vl != NULL;
+ vl = val_labs_next (v->val_labs, &j))
+ if (!write_value (h, &vl->value, v)
+ || !write_string (h, vl->label))
+ {
+ val_labs_done (&j);
+ return 0;
+ }
}
return 1;
add_encoding (struct outp_driver *this, char *filename)
{
struct ps_driver_ext *x = this->ext;
-
struct ps_encoding **pe;
filename = find_encoding_file (this, filename);
if (!filename)
return;
- pe = (struct ps_encoding **) hsh_probe (x->encodings, (void *) &filename);
+ pe = (struct ps_encoding **) hsh_probe (x->encodings, &filename);
if (*pe)
{
free (filename);
if (ext->lines[type] == NULL)
ext->lines[type] = hsh_create (31, compare_line, hash_line,
free_line, NULL);
- f = (struct line_form **) hsh_probe (ext->lines[type],
- (struct line_form *) & ind);
+ f = (struct line_form **) hsh_probe (ext->lines[type], &ind);
if (*f == NULL)
{
*f = xmalloc (sizeof **f + sizeof (int[15][2]));
02111-1307, USA. */
#include <config.h>
+#include <assert.h>
+#include <inttypes.h>
+#include <limits.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>
+#include "alloc.h"
#include "magic.h"
#include "random.h"
#include "settings.h"
-/* Deal with broken system random number generator. */
-#if HAVE_GOOD_RANDOM
-#define real_rand rand
-#define real_srand srand
-#define REAL_RAND_MAX RAND_MAX
-#else /* !HAVE_GOOD_RANDOM */
-#define REAL_RAND_MAX 32767
-
-/* Some systems are so broken that they do not supply a value for
- RAND_MAX. There is absolutely no reliable way to determine this
- value, either. So we must supply our own. This one is the one
- presented in the ANSI C standard as strictly compliant. */
-static unsigned long int next = 1;
+/* Random number generator. */
+struct rng
+ {
+ /* RC4-based random bytes. */
+ uint8_t s[256];
+ uint8_t i, j;
+
+ /* Normal distribution. */
+ double next_normal;
+ };
+
+/* Creates a new random number generator, seeds it based on
+ the current time, and returns it. */
+struct rng *
+rng_create (void)
+{
+ struct rng *rng;
+ static time_t t;
+
+ rng = xmalloc (sizeof *rng);
+ if (t == 0)
+ time (&t);
+ else
+ t++;
+ rng_seed (rng, &t, sizeof t);
+ rng->next_normal = NOT_DOUBLE;
+ return rng;
+ }
+
+/* Destroys RNG. */
+void
+rng_destroy (struct rng *rng)
+{
+ free (rng);
+}
-int
-real_rand (void)
+/* Swap bytes. */
+static inline void
+swap_byte (uint8_t *a, uint8_t *b)
{
- next = next * 1103515245 + 12345;
- return (unsigned int)(next / 65536) % 32768;
+ uint8_t t = *a;
+ *a = *b;
+ *b = t;
}
+/* Seeds RNG based on the SIZE bytes in BUF.
+ At most the first 256 bytes of BUF are used. */
void
-real_srand (unsigned int seed)
+rng_seed (struct rng *rng, const void *key_, size_t size)
{
- next = seed;
+ const uint8_t *key = key_;
+ int key_idx;
+ uint8_t *s;
+ int i, j;
+
+ assert (rng != NULL);
+
+ s = rng->s;
+ rng->i = rng->j = 0;
+ for (i = 0; i < 256; i++)
+ s[i] = i;
+ for (key_idx = 0, i = 0; i < 256; i++)
+ {
+ j = (j + s[i] + key[key_idx]) & 255;
+ swap_byte (s + i, s + j);
+ if (++key_idx >= size)
+ key_idx = 0;
+ }
}
-#endif /* !HAVE_GOOD_RANDOM */
-
-/* The random number generator here is an implementation in C of
- Knuth's Algorithm 3.2.2B (Randomizing by Shuffling) in _The Art of
- Computer Programming_, Vol. 2. */
-#define k 13
-static int V[k];
-static int Y;
-
-static double X2;
-
-/* Initializes the random number generator. Should be called once by
- every cmd_*() that uses random numbers. Note that this includes
- all procedures that use expressions since they may generate random
- numbers. */
+/* Reads SIZE random bytes from RNG into BUF. */
void
-setup_randomize (void)
+rng_get_bytes (struct rng *rng, void *buf_, size_t size)
{
- static time_t curtime;
- int i;
+ uint8_t *buf = buf_;
+ uint8_t *s;
+ uint8_t i, j;
+
+ assert (rng != 0);
- if (set_seed == NOT_LONG)
+ s = rng->s;
+ i = rng->i;
+ j = rng->j;
+ while (size-- > 0)
{
- if (!curtime)
- time (&curtime);
- real_srand (curtime++);
+ i += 1;
+ j += s[i];
+ swap_byte (s + i, s + j);
+ *buf++ = s[(s[i] + s[j]) & 255];
}
- else
- real_srand (set_seed);
+ rng->i = i;
+ rng->j = j;
+}
- set_seed_used = 1;
+/* Returns a random int in the range [0, INT_MAX]. */
+int
+rng_get_int (struct rng *rng)
+{
+ int value;
- for (i = 0; i < k; i++)
- V[i] = real_rand ();
- Y = real_rand ();
- X2 = NOT_DOUBLE;
+ do
+ {
+ rng_get_bytes (rng, &value, sizeof value);
+ value = abs (value);
+ }
+ while (value < 0);
+
+ return value;
}
-/* Standard shuffling procedure for increasing randomness of the ANSI
- C random number generator. Returns a random number R where 0 <= R
- <= RAND_MAX. */
-inline int
-shuffle (void)
+/* Returns a random unsigned in the range [0, UINT_MAX]. */
+unsigned
+rng_get_unsigned (struct rng *rng)
{
- int j = k * Y / RAND_MAX;
- Y = V[j];
- V[j] = real_rand ();
- return Y;
+ unsigned value;
+
+ rng_get_bytes (rng, &value, sizeof value);
+ return value;
}
-/* Returns a random number R where 0 <= R <= X. */
-double
-rand_uniform (double x)
+/* Returns a random number from the uniform distribution with
+ range [0,1). */
+double
+rng_get_double (struct rng *rng)
{
- return ((double) shuffle ()) / (((double) RAND_MAX) / x);
+ unsigned long value;
+
+ rng_get_bytes (rng, &value, sizeof value);
+ return value / ULONG_MAX;
}
/* Returns a random number from the distribution with mean 0 and
- standard deviation X. This uses algorithm P in section 3.4.1C of
- Knuth's _Art of Computer Programming_, Vol 2. */
+ standard deviation 1. (Multiply the result by the desired
+ standard deviation, then add the desired mean.) */
double
-rand_normal (double x)
+rng_get_double_normal (struct rng *rng)
{
- double U1, U2;
- double V1, V2;
- double S;
- double X1;
-
- if (X2 != NOT_DOUBLE)
+ /* Knuth, _The Art of Computer Programming_, Vol. 2, 3.4.1C,
+ Algorithm P. */
+ double this_normal;
+
+ if (rng->next_normal != NOT_DOUBLE)
{
- double t = X2;
- X2 = NOT_DOUBLE;
- return t * x;
+ this_normal = rng->next_normal;
+ rng->next_normal = NOT_DOUBLE;
}
- do
+ else
{
- U1 = ((double) shuffle ()) / RAND_MAX;
- U2 = ((double) shuffle ()) / RAND_MAX;
- V1 = 2 * U1 - 1;
- V2 = 2 * U2 - 1;
- S = V1 * V1 + V2 * V2;
+ double v1, v2, s;
+
+ do
+ {
+ double u1 = rng_get_double (rng);
+ double u2 = rng_get_double (rng);
+ v1 = 2.0 * u1 - 1.0;
+ v2 = 2.0 * u2 - 1.0;
+ s = v1 * v1 + v2 * v2;
+ }
+ while (s >= 1);
+
+ this_normal = v1 * sqrt (-2. * log (s) / s);
+ rng->next_normal = v2 * sqrt (-2. * log (s) / s);
}
- while (S >= 1);
- X1 = V1 * sqrt (-2. * log (S) / S);
- X2 = V2 * sqrt (-2. * log (S) / S);
- return X1 * x;
+
+ return this_normal;
}
-/* Returns a random integer R, where 0 <= R < X. */
-int
-rand_simple (int x)
+/* Gets an initialized RNG for use in PSPP transformations and
+ procedures. */
+struct rng *
+pspp_rng (void)
{
- return shuffle () % x;
-}
+ static struct rng *rng;
+ if (rng == NULL)
+ rng = rng_create ();
+ return rng;
+}
#if !random_h
#define random_h 1
-void setup_randomize (void);
-double rand_uniform (double x);
-double rand_normal (double x);
-int rand_simple (int x);
+struct rng *rng_create (void);
+void rng_destroy (struct rng *);
+void rng_seed (struct rng *, const void *, size_t);
+void rng_get_bytes (struct rng *, void *, size_t);
+int rng_get_int (struct rng *);
+unsigned rng_get_unsigned (struct rng *);
+double rng_get_double (struct rng *);
+double rng_get_double_normal (struct rng *);
+
+struct rng *pspp_rng (void);
#endif /* random.h */
#include <stdlib.h>
#include <assert.h>
#include "alloc.h"
-#include "avl.h"
#include "command.h"
#include "error.h"
+#include "hash.h"
#include "lexer.h"
#include "str.h"
#include "var.h"
/* Finally, do the renaming. */
for (iter = head; iter; iter = iter->p.mfv.next)
- avl_force_delete (default_dict.var_by_name, iter);
+ hsh_force_delete (default_dict.name_tab, iter);
for (iter = head; iter; iter = iter->p.mfv.next)
{
strcpy (iter->name, iter->p.mfv.new_name);
- avl_force_insert (default_dict.var_by_name, iter);
+ hsh_force_insert (default_dict.name_tab, iter);
}
return CMD_SUCCESS;
02111-1307, USA. */
#include <config.h>
+#include <limits.h>
#include <stdio.h>
#include <math.h>
#include "alloc.h"
struct trns_header h;
int type; /* One of TYPE_*. */
int n, N; /* TYPE_A_FROM_B: n from N. */
- int m, t; /* TYPE_A_FROM_B: # selected so far; # so far. */
- int frac; /* TYPE_FRACTION: a fraction out of 65536. */
+ int m, t; /* TYPE_A_FROM_B: # picked so far; # so far. */
+ unsigned frac; /* TYPE_FRACTION: a fraction of UINT_MAX. */
};
int sample_trns_proc (struct trns_header *, struct ccase *);
int type;
int a, b;
- int frac;
+ unsigned frac;
lex_match_id ("SAMPLE");
return CMD_FAILURE;
}
- frac = tokval * 65536;
+ frac = tokval * UINT_MAX;
a = b = 0;
}
else
#if DEBUGGING
if (type == TYPE_FRACTION)
- printf ("SAMPLE %g.\n", frac / 65536.);
+ printf ("SAMPLE %g.\n", frac / (double) UINT_MAX);
else
printf ("SAMPLE %d FROM %d.\n", a, b);
#endif
struct sample_trns *t = (struct sample_trns *) trns;
double U;
- if (t->type == TYPE_FRACTION)
- return (rand_simple (0x10000) <= t->frac) - 2;
+ if (t->type == TYPE_FRACTION)
+ {
+ if (rng_get_unsigned (pspp_rng ()) <= t->frac)
+ return -1;
+ else
+ return -2;
+ }
if (t->m >= t->n)
return -2;
- U = rand_uniform (1);
+ U = rng_get_double (pspp_rng ());
if ((t->N - t->t) * U >= t->n - t->m)
{
t->t++;
#include <errno.h>
#include <float.h>
#include "alloc.h"
-#include "avl.h"
#include "error.h"
#include "file-handle.h"
#include "filename.h"
#include "format.h"
#include "getline.h"
+#include "hash.h"
#include "magic.h"
#include "misc.h"
#include "sfm.h"
#include "sfmP.h"
+#include "value-labels.h"
#include "str.h"
#include "var.h"
/* Utilities. */
/* bswap_int32(): Reverse the byte order of 32-bit integer *X. */
-#if __linux__
-#include <asm/byteorder.h>
-#include <netinet/in.h>
static inline void
-bswap_int32 (int32 * x)
+bswap_int32 (int32 *x)
{
- *x = ntohl (*x);
-}
-#else /* not Linux */
-static inline void
-bswap_int32 (int32 * x)
-{
- unsigned char *y = (char *) x;
+ unsigned char *y = (unsigned char *) x;
unsigned char t;
+
t = y[0];
y[0] = y[3];
y[3] = t;
+
t = y[1];
y[1] = y[2];
y[2] = t;
}
-#endif /* not Linux */
/* Reverse the byte order of 64-bit floating point *X. */
static inline void
-bswap_flt64 (flt64 * x)
+bswap_flt64 (flt64 *x)
{
- /* Note that under compilers of any quality, half of this function
- should optimize out as dead code. */
- unsigned char *y = (char *) x;
+ unsigned char *y = (unsigned char *) x;
+ unsigned char t;
- if (sizeof (flt64) == 8)
- {
- unsigned char t;
- t = y[0];
- y[0] = y[7];
- y[7] = t;
- t = y[1];
- y[1] = y[6];
- y[6] = t;
- t = y[2];
- y[2] = y[5];
- y[5] = t;
- t = y[3];
- y[3] = y[4];
- y[4] = t;
- }
- else
- {
- unsigned char t;
- size_t x;
+ t = y[0];
+ y[0] = y[7];
+ y[7] = t;
- for (x = 0; x < sizeof (flt64) / 2; x++)
- {
- t = y[x];
- y[x] = y[sizeof (flt64) - x];
- y[sizeof (flt64) - x] = t;
- }
- }
+ t = y[1];
+ y[1] = y[6];
+ y[6] = t;
+
+ t = y[2];
+ y[2] = y[5];
+ y[5] = t;
+
+ t = y[3];
+ y[3] = y[4];
+ y[4] = t;
}
static void
/* Create the dictionary. */
dict = ext->dict = xmalloc (sizeof *dict);
dict->var = NULL;
- dict->var_by_name = NULL;
+ dict->name_tab = NULL;
dict->nvar = 0;
dict->N = 0;
dict->nval = -1; /* Unknown. */
vv->index = dict->nvar - 1;
vv->foo = -1;
vv->label = NULL;
- vv->val_lab = NULL;
/* Copy first character of variable name. */
if (!isalpha ((unsigned char) sv.name[0])
long_string_count = vv->get.nv - 1;
}
vv->left = (vv->name[0] == '#');
+ vv->val_labs = val_labs_create (vv->width);
/* Get variable label, if any. */
if (sv.has_var_label == 1)
"%d were read from file."), h->fn, ext->case_size, next_value));
dict->var = xrealloc (dict->var, sizeof *dict->var * dict->nvar);
- /* Construct AVL tree of dictionary in order to speed up later
- processing and to check for duplicate varnames. */
- dict->var_by_name = avl_create (NULL, cmp_variable, NULL);
+ /* Construct hash table of dictionary in order to speed up
+ later processing and to check for duplicate varnames. */
+ dict->name_tab = hsh_create (8, compare_variables, hash_variable,
+ NULL, NULL);
for (i = 0; i < dict->nvar; i++)
- if (NULL != avl_insert (dict->var_by_name, dict->var[i]))
+ if (NULL != hsh_insert (dict->name_tab, dict->var[i]))
lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
h->fn, dict->var[i]->name));
free (dict->var[i]);
}
free (dict->var);
- if (dict->var_by_name)
- avl_destroy (dict->var_by_name, NULL);
+ if (dict->name_tab)
+ hsh_destroy (dict->name_tab);
free (dict);
ext->dict = NULL;
{
struct sfm_fhuser_ext *ext = h->ext; /* File extension record. */
- flt64 *raw_label = NULL; /* Array of raw label values. */
- struct value_label **cooked_label = NULL; /* Array of cooked labels. */
+ struct label
+ {
+ unsigned char raw_value[8]; /* Value as uninterpreted bytes. */
+ union value value; /* Value. */
+ char *label; /* Null-terminated label string. */
+ };
+
+ struct label *labels = NULL;
int32 n_labels; /* Number of labels. */
struct variable **var = NULL; /* Associated variables. */
bswap_int32 (&n_labels);
/* Allocate memory. */
- raw_label = xmalloc (sizeof *raw_label * n_labels);
- cooked_label = xmalloc (sizeof *cooked_label * n_labels);
+ labels = xmalloc (n_labels * sizeof *labels);
for (i = 0; i < n_labels; i++)
- cooked_label[i] = NULL;
+ labels[i].label = NULL;
- /* Read each value/label tuple. */
+ /* Read each value/label tuple into labels[]. */
for (i = 0; i < n_labels; i++)
{
- flt64 value;
+ struct label *label = labels + i;
unsigned char label_len;
+ size_t padded_len;
- int rem;
-
- /* Read value, label length. */
- assertive_bufread (h, &value, sizeof value, 0);
- assertive_bufread (h, &label_len, 1, 0);
- memcpy (&raw_label[i], &value, sizeof value);
+ /* Read value. */
+ assertive_bufread (h, label->raw_value, sizeof label->raw_value, 0);
- /* Read label. */
- cooked_label[i] = xmalloc (sizeof **cooked_label);
- cooked_label[i]->s = xmalloc (label_len + 1);
- assertive_bufread (h, cooked_label[i]->s, label_len, 0);
- cooked_label[i]->s[label_len] = 0;
+ /* Read label length. */
+ assertive_bufread (h, &label_len, sizeof label_len, 0);
+ padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
- /* Skip padding. */
- rem = REM_RND_UP (label_len + 1, sizeof (flt64));
- if (rem)
- assertive_bufread (h, &value, rem, 0);
+ /* Read label, padding. */
+ label->label = xmalloc (padded_len + 1);
+ assertive_bufread (h, label->label, padded_len - 1, 0);
+ label->label[label_len] = 0;
}
/* Second step: Read the type 4 record that has the list of
if (rec_type != 4)
lose ((ME, _("%s: Variable index record (type 4) does not immediately "
- "follow value label record (type 3) as it ought."), h->fn));
+ "follow value label record (type 3) as it should."), h->fn));
}
/* Read number of variables associated with value label from type 4
"is not between 1 and the number of variables (%d)."),
h->fn, n_vars, ext->dict->nvar));
- /* Allocate storage. */
- var = xmalloc (sizeof *var * n_vars);
-
/* Read the list of variables. */
+ var = xmalloc (n_vars * sizeof *var);
for (i = 0; i < n_vars; i++)
{
int32 var_index;
/* Make sure it's a real variable. */
v = var_by_index[var_index - 1];
if (v == NULL)
- lose ((ME, _("%s: Variable index associated with value label (%d) refers "
- "to a continuation of a string variable, not to an actual "
- "variable."), h->fn, var_index));
+ lose ((ME, _("%s: Variable index associated with value label (%d) "
+ "refers to a continuation of a string variable, not to "
+ "an actual variable."), h->fn, var_index));
if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
- lose ((ME, _("%s: Value labels are not allowed on long string variables "
- "(%s)."), h->fn, v->name));
+ lose ((ME, _("%s: Value labels are not allowed on long string "
+ "variables (%s)."), h->fn, v->name));
/* Add it to the list of variables. */
var[i] = v;
var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
- /* Create a value_label for each value/label tuple, now that we know
- the desired type. */
- for (i = 0; i < n_labels; i++)
+ /* Fill in labels[].value, now that we know the desired type. */
+ for (i = 0; i < n_labels; i++)
{
+ struct label *label = labels + i;
+
if (var[0]->type == ALPHA)
- {
- const int copy_len = min (sizeof (flt64), MAX_SHORT_STRING);
- memcpy (cooked_label[i]->v.s, (char *) &raw_label[i], copy_len);
- if (MAX_SHORT_STRING > copy_len)
- memset (&cooked_label[i]->v.s[copy_len], ' ',
- MAX_SHORT_STRING - copy_len);
- } else {
- cooked_label[i]->v.f = raw_label[i];
- if (ext->reverse_endian)
- bswap_flt64 (&cooked_label[i]->v.f);
- }
- cooked_label[i]->ref_count = n_vars;
+ {
+ const int copy_len = min (sizeof (label->raw_value),
+ sizeof (label->label));
+ memcpy (label->value.s, label->raw_value, copy_len);
+ } else {
+ flt64 f;
+ assert (sizeof f == sizeof label->raw_value);
+ memcpy (&f, label->raw_value, sizeof f);
+ if (ext->reverse_endian)
+ bswap_flt64 (&f);
+ label->value.f = f;
+ }
}
-
+
/* Assign the value_label's to each variable. */
for (i = 0; i < n_vars; i++)
{
struct variable *v = var[i];
int j;
- /* Create AVL tree if necessary. */
- if (!v->val_lab)
- v->val_lab = avl_create (NULL, val_lab_cmp, (void *) (v->width));
-
/* Add each label to the variable. */
for (j = 0; j < n_labels; j++)
{
- struct value_label *old = avl_replace (v->val_lab, cooked_label[j]);
- if (old == NULL)
+ struct label *label = labels + j;
+ if (!val_labs_replace (v->val_labs, label->value, label->label))
continue;
if (var[0]->type == NUMERIC)
msg (MW, _("%s: File contains duplicate label for value %g for "
- "variable %s."), h->fn, cooked_label[j]->v.f, v->name);
+ "variable %s."), h->fn, label->value.f, v->name);
else
msg (MW, _("%s: File contains duplicate label for value `%.*s' "
- "for variable %s."), h->fn, v->width,
- cooked_label[j]->v.s, v->name);
-
- free_value_label (old);
+ "for variable %s."),
+ h->fn, v->width, label->value.s, v->name);
}
}
- free (cooked_label);
- free (raw_label);
+ for (i = 0; i < n_labels; i++)
+ free (labels[i].label);
free (var);
return 1;
lossage:
- if (cooked_label)
- for (i = 0; i < n_labels; i++)
- if (cooked_label[i])
- {
- free (cooked_label[i]->s);
- free (cooked_label[i]);
- }
- free (raw_label);
+ if (labels)
+ {
+ for (i = 0; i < n_labels; i++)
+ free (labels[i].label);
+ free (labels);
+ }
free (var);
return 0;
}
#endif
#include "alloc.h"
#include "approx.h"
-#include "avl.h"
#include "error.h"
#include "file-handle.h"
#include "getline.h"
+#include "hash.h"
#include "magic.h"
#include "misc.h"
#include "sfm.h"
#include "sfmP.h"
#include "str.h"
+#include "value-labels.h"
#include "var.h"
#include "version.h"
int32 vars[1] P;
};
- avl_traverser i;
+ struct val_labs_iterator *i;
struct value_label_rec *vlr;
struct variable_index_rec vir;
- struct value_label *vl;
+ struct val_lab *vl;
size_t vlr_size;
flt64 *loc;
- avl_traverser_init (i);
- if (v->val_lab == NULL || avl_count (v->val_lab) == 0)
+ if (!val_labs_count (v->val_labs))
return 1;
/* Pass 1: Count bytes. */
vlr_size = (sizeof (struct value_label_rec)
- + sizeof (flt64) * (avl_count (v->val_lab) - 1));
- while (NULL != (vl = avl_traverse (v->val_lab, &i)))
- vlr_size += ROUND_UP (strlen (vl->s) + 1, sizeof (flt64));
+ + sizeof (flt64) * (val_labs_count (v->val_labs) - 1));
+ for (vl = val_labs_first (v->val_labs, &i); vl != NULL;
+ vl = val_labs_next (v->val_labs, &i))
+ vlr_size += ROUND_UP (strlen (vl->label) + 1, sizeof (flt64));
/* Pass 2: Copy bytes. */
- vlr = local_alloc (vlr_size);
+ vlr = xmalloc (vlr_size);
vlr->rec_type = 3;
- vlr->n_labels = avl_count (v->val_lab);
+ vlr->n_labels = val_labs_count (v->val_labs);
loc = vlr->labels;
- while (NULL != (vl = avl_traverse (v->val_lab, &i)))
+ for (vl = val_labs_first_sorted (v->val_labs, &i); vl != NULL;
+ vl = val_labs_next (v->val_labs, &i))
{
- int len = strlen (vl->s);
+ size_t len = strlen (vl->label);
- *loc++ = vl->v.f;
+ *loc++ = vl->value.f;
*(unsigned char *) loc = len;
- memcpy (&((unsigned char *) loc)[1], vl->s, len);
+ memcpy (&((unsigned char *) loc)[1], vl->label, len);
memset (&((unsigned char *) loc)[1 + len], ' ',
REM_RND_UP (len + 1, sizeof (flt64)));
loc += DIV_RND_UP (len + 1, sizeof (flt64));
if (!bufwrite (inf->h, vlr, vlr_size))
{
- local_free (vlr);
+ free (vlr);
return 0;
}
- local_free (vlr);
+ free (vlr);
vir.rec_type = 4;
vir.n_vars = 1;
#include <assert.h>
#include <ctype.h>
#include <stdlib.h>
+#include "algorithm.h"
#include "alloc.h"
-#include "avl.h"
#include "command.h"
#include "error.h"
#include "file-handle.h"
+#include "hash.h"
#include "lexer.h"
#include "misc.h"
#include "output.h"
#include "sfm.h"
#include "som.h"
#include "tab.h"
+#include "value-labels.h"
#include "var.h"
#include "vector.h"
tab_hline (t, TAL_2, 0, 3, 1);
for (r = 1, i = 0; i < d->nvar; i++)
{
- int nvl = d->var[i]->val_lab ? avl_count (d->var[i]->val_lab) : 0;
+ int nvl = val_labs_count (d->var[i]->val_labs);
if (r + 10 + nvl > nr)
{
static void display_variables (struct variable **, int, int);
static void display_vectors (int sorted);
-static int cmp_var_by_name (const void *, const void *);
-
int
cmd_display (void)
{
}
if (sorted)
- qsort (vl, n, sizeof *vl, cmp_var_by_name);
+ sort (vl, n, sizeof *vl, compare_variables, NULL);
display_variables (vl, n, as);
return lex_end_of_command ();
}
-static int
-cmp_var_by_name (const void *a, const void *b)
-{
- return strcmp ((*((struct variable **) a))->name, (*((struct variable **) b))->name);
-}
-
static void
display_macros (void)
{
if (as == AS_DICTIONARY || as == AS_VARIABLES)
{
- int nvl = v->val_lab ? avl_count (v->val_lab) : 0;
+ int nvl = val_labs_count (v->val_labs);
if (r + 10 + nvl > nr)
{
}
/* Value labels. */
- if (as == AS_DICTIONARY && v->val_lab)
+ if (as == AS_DICTIONARY && val_labs_count (v->val_labs))
{
- avl_traverser trav;
- struct value_label *vl;
- int nvl = avl_count (v->val_lab);
+ struct val_labs_iterator *i;
+ struct val_lab *vl;
int orig_r = r;
- int i;
#if 0
tab_text (t, 1, r, TAB_LEFT, _("Value"));
#endif
tab_hline (t, TAL_1, 1, 2, r);
- avl_traverser_init (trav);
- for (i = 1, vl = avl_traverse (v->val_lab, &trav); vl;
- i++, vl = avl_traverse (v->val_lab, &trav))
- {
+ for (vl = val_labs_first_sorted (v->val_labs, &i); vl != NULL;
+ vl = val_labs_next (v->val_labs, &i))
+ {
char buf[128];
if (v->type == ALPHA)
{
- memcpy (buf, vl->v.s, v->width);
+ memcpy (buf, vl->value.s, v->width);
buf[v->width] = 0;
}
else
- sprintf (buf, "%g", vl->v.f);
+ sprintf (buf, "%g", vl->value.f);
tab_text (t, 1, r, TAB_NONE, buf);
- tab_text (t, 2, r, TAB_LEFT, vl->s);
+ tab_text (t, 2, r, TAB_LEFT, vl->label);
r++;
-
- if (i == nvl)
- break;
- }
-
- for (;;)
- {
- if (vl == NULL)
- break;
- vl = avl_traverse (v->val_lab, &trav);
}
tab_vline (t, TAL_1, 2, orig_r, r - 1);
#include "lexer.h"
#include "error.h"
#include "magic.h"
+#include "value-labels.h"
#include "var.h"
#include "vfm.h"
printf ("-----------------------------------------------------------\n");
printf (" %s %s\n\n", cmd.v_variables[cur_var]->name, cmd.v_variables[cur_var]->label);
printf ("%s %8.4f %8.0f %8.4f %8.3f %8.3f\n",
- get_val_lab (grps, *g1, 0), g1->f, n1, mean1, sd1, se1);
+ val_labs_find (grps->val_labs, *g1), g1->f, n1, mean1, sd1, se1);
printf ("%s %8.4f %8.0f %8.4f %8.3f %8.3f\n",
- get_val_lab (grps, *g2, 0), g2->f, n2, mean2, sd2, se2);
+ val_labs_find (grps->val_labs, *g2), g2->f, n2, mean2, sd2, se2);
printf ("-----------------------------------------------------------\n");
printf ("\n Mean Difference = %8.4f\n", diff);
printf ("\n Levene's Test for Equality of Variances: F= %.3f P= %.3f\n",
#include <stddef.h>
#include <stdlib.h>
#include "alloc.h"
-#include "avl.h"
#include "command.h"
#include "do-ifP.h"
#include "error.h"
+#include "hash.h"
#include "lexer.h"
#include "str.h"
+#include "value-labels.h"
#include "var.h"
#include "debug-print.h"
struct dictionary *temp_dict;
int temp_trns;
-#if 0
-/* Displays all the value labels in TREE, with label S. */
-void
-display_tree (char *s, avl_tree *tree)
-{
- value_label *iter;
- avl_traverser *trav = NULL;
-
- printf("%s tree:\n", s);
- fflush(stdout);
- while ((iter = avl_traverse (tree, &trav)) != NULL)
- printf (" %g: %s\n", iter->v.f, iter->s);
-}
-#endif
-
/* Parses the TEMPORARY command. */
int
cmd_temporary (void)
dest->print = src->print;
dest->write = src->write;
- dest->val_lab = copy_value_labels (src->val_lab);
+ dest->val_labs = val_labs_copy (src->val_labs);
dest->label = src->label ? xstrdup (src->label) : NULL;
}
struct dictionary *d = xmalloc (sizeof *d);
d->var = NULL;
- d->var_by_name = avl_create (NULL, cmp_variable, NULL);
+ d->name_tab = hsh_create (8, compare_variables, hash_variable, NULL, NULL);
d->nvar = 0;
d->N = 0;
else d->documents = NULL;
/* Then the variables. */
- d->var_by_name = avl_create (NULL, cmp_variable, NULL);
+ d->name_tab = hsh_create (8, compare_variables, hash_variable, NULL, NULL);
d->var = xmalloc (default_dict.nvar * sizeof *d->var);
for (i = 0; i < default_dict.nvar; i++)
{
copy_variable (d->var[i], default_dict.var[i]);
strcpy (d->var[i]->name, default_dict.var[i]->name);
d->var[i]->index = i;
- avl_force_insert (d->var_by_name, d->var[i]);
+ hsh_force_insert (d->name_tab, d->var[i]);
}
/* Then the SPLIT FILE variables. */
free (default_dict.splits);
default_dict.splits = NULL;
- avl_destroy (default_dict.var_by_name, NULL);
- default_dict.var_by_name = NULL;
+ hsh_destroy (default_dict.name_tab);
+ default_dict.name_tab = NULL;
for (i = 0; i < default_dict.nvar; i++)
{
/* 2. Copy dictionary D into the active file dictionary. */
default_dict = *d;
- if (!default_dict.var_by_name)
+ if (default_dict.name_tab == NULL)
{
- default_dict.var_by_name = avl_create (NULL, cmp_variable, NULL);
+ default_dict.name_tab = hsh_create (8, compare_variables, hash_variable,
+ NULL, NULL);
for (i = 0; i < default_dict.nvar; i++)
- avl_force_insert (default_dict.var_by_name, default_dict.var[i]);
+ hsh_force_insert (default_dict.name_tab, default_dict.var[i]);
}
/* 3. Destroy dictionary D. */
free (d->splits);
d->splits = NULL;
- if (d->var_by_name)
- avl_destroy (d->var_by_name, NULL);
+ if (d->name_tab)
+ hsh_destroy (d->name_tab);
for (i = 0; i < d->nvar; i++)
{
struct variable *v = d->var[i];
- if (v->val_lab)
- {
- avl_destroy (v->val_lab, free_val_lab);
- v->val_lab = NULL;
- }
+ val_labs_destroy (v->val_labs);
if (v->label)
{
free (v->label);
#include <stdio.h>
#include <stdlib.h>
#include "alloc.h"
-#include "avl.h"
#include "command.h"
#include "error.h"
+#include "hash.h"
#include "lexer.h"
#include "str.h"
+#include "value-labels.h"
#include "var.h"
\f
/* Declarations. */
return 0;
}
- if (erase && v[i]->val_lab)
- {
- avl_destroy (vp->val_lab, free_val_lab);
- vp->val_lab = NULL;
- }
+ if (erase)
+ val_labs_clear (vp->val_labs);
}
return 1;
}
/* Parse all the labels and add them to the variables. */
do
{
- struct value_label *label;
-
- /* Allocate label. */
- label = xmalloc (sizeof *label);
- label->ref_count = nv;
+ union value value;
+ char *label;
- /* Set label->v. */
+ /* Set value. */
if (v[0]->type == ALPHA)
{
if (token != T_STRING)
msg (SE, _("String expected for value."));
return 0;
}
- st_bare_pad_copy (label->v.s, ds_value (&tokstr), MAX_SHORT_STRING);
+ st_bare_pad_copy (value.s, ds_value (&tokstr), MAX_SHORT_STRING);
}
else
{
}
if (!lex_integer_p ())
msg (SW, _("Value label `%g' is not integer."), tokval);
- label->v.f = tokval;
+ value.f = tokval;
}
- /* Set label->s. */
+ /* Set label. */
lex_get ();
if (!lex_force_string ())
return 0;
msg (SW, _("Truncating value label to 60 characters."));
ds_truncate (&tokstr, 60);
}
- label->s = xstrdup (ds_value (&tokstr));
+ label = ds_value (&tokstr);
for (i = 0; i < nv; i++)
- {
- if (!v[i]->val_lab)
- v[i]->val_lab = avl_create (NULL, val_lab_cmp,
- (void *) (v[i]->width));
-
- {
- struct value_label *old;
-
- old = avl_replace (v[i]->val_lab, label);
- if (old)
- free_value_label (old);
- }
- }
+ val_labs_replace (v[i]->val_labs, value, label);
lex_get ();
}
puts (_("Value labels:"));
for (i = 0; i < nvar; i++)
{
- AVLtraverser *t = NULL;
+ struct hsh_iterator i;
struct value_label *val;
printf (" %s\n", var[i]->name);
- if (var[i]->val_lab)
- if (var[i]->type == NUMERIC)
- for (val = avltrav (var[i]->val_lab, &t);
- val; val = avltrav (var[i]->val_lab, &t))
- printf (" %g: `%s'\n", val->v.f, val->s);
- else
- for (val = avltrav (var[i]->val_lab, &t);
- val; val = avltrav (var[i]->val_lab, &t))
- printf (" `%.8s': `%s'\n", val->v.s, val->s);
+ if (var[i]->val_lab)
+ {
+ for (val = hsh_first (var[i]->val_lab, &i); val != NULL;
+ val = hsh_next (var[i]->val_lab, &i))
+ if (var[i]->type == NUMERIC)
+ printf (" %g: `%s'\n", val->v.f, val->s);
+ else
+ printf (" `%.8s': `%s'\n", val->v.s, val->s);
+ }
else
printf (_(" (no value labels)\n"));
}
}
#endif /* DEBUGGING */
-
-/* Compares two value labels and returns a strcmp()-type result. */
-int
-val_lab_cmp (const void *a, const void *b, void *param)
-{
- if ((int) param)
- return strncmp (((struct value_label *) a)->v.s,
- ((struct value_label *) b)->v.s,
- (int) param);
- else
- {
- int temp = (((struct value_label *) a)->v.f
- - ((struct value_label *) b)->v.f);
- if (temp > 0)
- return 1;
- else if (temp < 0)
- return -1;
- else
- return 0;
- }
-}
-
-/* Callback function to increment the reference count for a value
- label. */
-void *
-inc_ref_count (void *pv, void *param unused)
-{
- ((struct value_label *) pv)->ref_count++;
- return pv;
-}
-
-/* Copy the avl tree of value labels and return a pointer to the
- copy. */
-avl_tree *
-copy_value_labels (avl_tree *src)
-{
- avl_tree *dest;
-
- if (src == NULL)
- return NULL;
- dest = avl_copy (NULL, src, inc_ref_count);
-
- return dest;
-}
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@gnu.org>.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+#include <config.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "alloc.h"
+#include "hash.h"
+#include "value-labels.h"
+
+static hsh_compare_func compare_int_val_lab;
+static hsh_hash_func hash_int_val_lab;
+static hsh_free_func free_int_val_lab;
+
+struct atom;
+static struct atom *atom_create (const char *string);
+static void atom_destroy (struct atom *);
+static const char *atom_to_string (const struct atom *);
+
+/* A set of value labels. */
+struct val_labs
+ {
+ int width; /* 0=numeric, otherwise string width. */
+ struct hsh_table *labels; /* Hash table of `struct int_val_lab's. */
+ };
+
+/* Creates and returns a new, empty set of value labels with the
+ given WIDTH, which must designate a numeric (0) or short
+ string (1...MAX_SHORT_STRING inclusive) width. */
+struct val_labs *
+val_labs_create (int width)
+{
+ struct val_labs *vls;
+
+ assert (width >= 0);
+
+ vls = xmalloc (sizeof *vls);
+ vls->width = width;
+ vls->labels = NULL;
+ return vls;
+}
+
+/* Creates and returns a new set of value labels identical to
+ VLS. */
+struct val_labs *
+val_labs_copy (const struct val_labs *vls)
+{
+ struct val_labs *copy;
+ struct val_labs_iterator *i;
+ struct val_lab *vl;
+
+ assert (vls != NULL);
+
+ copy = val_labs_create (vls->width);
+ for (vl = val_labs_first (vls, &i); vl != NULL;
+ vl = val_labs_next (vls, &i))
+ val_labs_add (copy, vl->value, vl->label);
+ return copy;
+}
+
+/* Changes the width of VLS to NEW_WIDTH. If VLS is numeric,
+ NEW_WIDTH must be 0, otherwise it must be within the range
+ 1...MAX_SHORT_STRING inclusive. */
+void
+val_labs_set_width (struct val_labs *vls, int new_width)
+{
+ assert (vls != NULL);
+ assert ((vls->width == 0) == (new_width == 0));
+
+ vls->width = new_width;
+}
+
+/* Destroys VLS. */
+void
+val_labs_destroy (struct val_labs *vls)
+{
+ if (vls != NULL && vls->labels != NULL)
+ hsh_destroy (vls->labels);
+}
+
+/* Removes all the value labels from VLS. */
+void
+val_labs_clear (struct val_labs *vls)
+{
+ assert (vls != NULL);
+
+ hsh_destroy (vls->labels);
+ vls->labels = NULL;
+}
+
+/* Returns the number of value labels in VLS. */
+size_t
+val_labs_count (struct val_labs *vls)
+{
+ assert (vls != NULL);
+
+ if (vls->labels == NULL)
+ return 0;
+ else
+ return hsh_count (vls->labels);
+}
+\f
+/* One value label in internal format. */
+struct int_val_lab
+ {
+ union value value; /* The value being labeled. */
+ struct atom *label; /* A ref-counted string. */
+ };
+
+/* Creates and returns an int_val_lab based on VALUE and
+ LABEL. */
+static struct int_val_lab *
+create_int_val_lab (struct val_labs *vls, union value value, const char *label)
+{
+ struct int_val_lab *ivl;
+
+ assert (label != NULL);
+ assert (vls->width <= MAX_SHORT_STRING);
+
+ ivl = xmalloc (sizeof *ivl);
+ ivl->value = value;
+ if (vls->width > 0)
+ memset (ivl->value.s + vls->width, ' ', MAX_SHORT_STRING - vls->width);
+ ivl->label = atom_create (label);
+
+ return ivl;
+}
+
+/* If VLS does not already contain a value label for VALUE, adds
+ LABEL for it and returns nonzero. Otherwise, returns zero.
+ Behavior is undefined if VLS's width is greater than
+ MAX_SHORT_STRING. */
+int
+val_labs_add (struct val_labs *vls, union value value, const char *label)
+{
+ struct int_val_lab *ivl;
+ void **vlpp;
+
+ assert (vls != NULL);
+ assert (vls->width <= MAX_SHORT_STRING);
+ assert (label != NULL);
+
+ if (vls->labels == NULL)
+ vls->labels = hsh_create (8, compare_int_val_lab, hash_int_val_lab,
+ free_int_val_lab, vls);
+
+ ivl = create_int_val_lab (vls, value, label);
+ vlpp = hsh_probe (vls->labels, ivl);
+ if (*vlpp == NULL)
+ {
+ *vlpp = ivl;
+ return 1;
+ }
+ else
+ {
+ free_int_val_lab (ivl, vls);
+ return 0;
+ }
+}
+
+/* Sets LABEL as the value label for VALUE in VLS. Returns zero
+ if there wasn't already a value label for VALUE, or nonzero if
+ there was. Behavior is undefined if VLS's width is greater
+ than MAX_SHORT_STRING. */
+int
+val_labs_replace (struct val_labs *vls, union value value, const char *label)
+{
+ struct int_val_lab *ivl;
+
+ assert (vls != NULL);
+ assert (vls->width <= MAX_SHORT_STRING);
+ assert (label != NULL);
+
+ if (vls->labels == NULL)
+ {
+ val_labs_add (vls, value, label);
+ return 0;
+ }
+
+ ivl = hsh_replace (vls->labels, create_int_val_lab (vls, value, label));
+ if (ivl == NULL)
+ return 0;
+ else
+ {
+ free_int_val_lab (ivl, vls);
+ return 1;
+ }
+}
+
+/* Removes any value label for VALUE within VLS. Returns nonzero
+ if a value label was removed. Behavior is undefined if VLS's
+ width is greater than MAX_SHORT_STRING. */
+int
+val_labs_remove (struct val_labs *vls, union value value)
+{
+ assert (vls != NULL);
+ assert (vls->width <= MAX_SHORT_STRING);
+
+ if (vls->labels != NULL)
+ {
+ struct int_val_lab *ivl = create_int_val_lab (vls, value, "");
+ int deleted = hsh_delete (vls->labels, &ivl);
+ free (ivl);
+ return deleted;
+ }
+ else
+ return 0;
+}
+
+/* Searches VLS for a value label for VALUE. If successful,
+ returns the label; otherwise, returns a null pointer. If
+ VLS's width is greater than MAX_SHORT_STRING, always returns a
+ null pointer. */
+const char *
+val_labs_find (const struct val_labs *vls, union value value)
+{
+ assert (vls != NULL);
+
+ if (vls->width > MAX_SHORT_STRING)
+ return NULL;
+
+ if (vls->labels != NULL)
+ {
+ struct int_val_lab ivl, *vlp;
+
+ ivl.value = value;
+ vlp = hsh_find (vls->labels, &ivl);
+ if (vlp != NULL)
+ return atom_to_string (vlp->label);
+ }
+ return NULL;
+}
+\f
+/* A value labels iterator. */
+struct val_labs_iterator
+ {
+ void **labels; /* The labels, in order. */
+ void **lp; /* Current label. */
+ struct val_lab vl; /* Structure presented to caller. */
+ };
+
+/* Sets up *IP for iterating through the value labels in VLS in
+ no particular order. Returns the first value label or a null
+ pointer if VLS is empty. If the return value is non-null,
+ then val_labs_next() may be used to continue iterating or
+ val_labs_done() to free up the iterator. Otherwise, neither
+ function may be called for *IP. */
+struct val_lab *
+val_labs_first (const struct val_labs *vls, struct val_labs_iterator **ip)
+{
+ struct val_labs_iterator *i;
+
+ assert (vls != NULL);
+ assert (ip != NULL);
+
+ if (vls->labels == NULL || vls->width > MAX_SHORT_STRING)
+ return NULL;
+
+ i = *ip = xmalloc (sizeof *i);
+ i->labels = hsh_data_copy (vls->labels);
+ i->lp = i->labels;
+ return val_labs_next (vls, ip);
+}
+
+/* Sets up *IP for iterating through the value labels in VLS in
+ sorted order of values. Returns the first value label or a
+ null pointer if VLS is empty. If the return value is
+ non-null, then val_labs_next() may be used to continue
+ iterating or val_labs_done() to free up the iterator.
+ Otherwise, neither function may be called for *IP. */
+struct val_lab *
+val_labs_first_sorted (const struct val_labs *vls,
+ struct val_labs_iterator **ip)
+{
+ struct val_labs_iterator *i;
+
+ assert (vls != NULL);
+ assert (ip != NULL);
+
+ if (vls->labels == NULL || vls->width > MAX_SHORT_STRING)
+ return NULL;
+
+ i = *ip = xmalloc (sizeof *i);
+ i->lp = i->labels = hsh_sort_copy (vls->labels);
+ return val_labs_next (vls, ip);
+}
+
+/* Returns the next value label in an iteration begun by
+ val_labs_first() or val_labs_first_sorted(). If the return
+ value is non-null, then val_labs_next() may be used to
+ continue iterating or val_labs_done() to free up the iterator.
+ Otherwise, neither function may be called for *IP. */
+struct val_lab *
+val_labs_next (const struct val_labs *vls, struct val_labs_iterator **ip)
+{
+ struct val_labs_iterator *i;
+ struct int_val_lab *ivl;
+
+ assert (vls != NULL);
+ assert (vls->width <= MAX_SHORT_STRING);
+ assert (ip != NULL);
+ assert (*ip != NULL);
+
+ i = *ip;
+ ivl = *i->lp++;
+ if (ivl != NULL)
+ {
+ i->vl.value = ivl->value;
+ i->vl.label = atom_to_string (ivl->label);
+ return &i->vl;
+ }
+ else
+ {
+ free (i->labels);
+ free (i);
+ *ip = NULL;
+ return NULL;
+ }
+}
+
+/* Discards the state for an incomplete iteration begun by
+ val_labs_first() or val_labs_first_sorted(). */
+void
+val_labs_done (struct val_labs_iterator **ip)
+{
+ struct val_labs_iterator *i;
+
+ assert (ip != NULL);
+ assert (*ip != NULL);
+
+ i = *ip;
+ free (i->labels);
+ free (i);
+ *ip = NULL;
+}
+\f
+/* Compares two value labels and returns a strcmp()-type result. */
+int
+compare_int_val_lab (const void *a_, const void *b_, void *vls_)
+{
+ const struct int_val_lab *a = a_;
+ const struct int_val_lab *b = b_;
+ const struct val_labs *vls = vls_;
+
+ if (vls->width == 0)
+ return a->value.f < b->value.f ? -1 : a->value.f > b->value.f;
+ else
+ return memcmp (a->value.s, b->value.s, vls->width);
+}
+
+/* Hash a value label. */
+unsigned
+hash_int_val_lab (const void *vl_, void *vls_)
+{
+ const struct int_val_lab *vl = vl_;
+ const struct val_labs *vls = vls_;
+
+ if (vls->width == 0)
+ return hsh_hash_double (vl->value.f);
+ else
+ return hsh_hash_bytes (vl->value.s, sizeof vl->value.s);
+}
+
+/* Free a value label. */
+void
+free_int_val_lab (void *vl_, void *vls_ unused)
+{
+ struct int_val_lab *vl = vl_;
+
+ atom_destroy (vl->label);
+ free (vl);
+}
+\f
+/* Atoms. */
+
+/* An atom. */
+struct atom
+ {
+ char *string; /* String value. */
+ unsigned ref_count; /* Number of references. */
+ };
+
+static hsh_compare_func compare_atoms;
+static hsh_hash_func hash_atom;
+static hsh_free_func free_atom;
+
+/* Hash table of atoms. */
+static struct hsh_table *atoms;
+
+/* Creates and returns an atom for STRING. */
+static struct atom *
+atom_create (const char *string)
+{
+ struct atom a;
+ void **app;
+
+ assert (string != NULL);
+
+ if (atoms == NULL)
+ atoms = hsh_create (8, compare_atoms, hash_atom, free_atom, NULL);
+
+ a.string = (char *) string;
+ app = hsh_probe (atoms, &a);
+ if (*app != NULL)
+ {
+ struct atom *ap = *app;
+ ap->ref_count++;
+ return ap;
+ }
+ else
+ {
+ struct atom *ap = xmalloc (sizeof *ap);
+ ap->string = xstrdup (string);
+ ap->ref_count = 1;
+ *app = ap;
+ return ap;
+ }
+}
+
+/* Destroys ATOM. */
+static void
+atom_destroy (struct atom *atom)
+{
+ if (atom != NULL)
+ {
+ assert (atom->ref_count > 0);
+ atom->ref_count--;
+ if (atom->ref_count == 0)
+ hsh_force_delete (atoms, atom);
+ }
+}
+
+/* Returns the string associated with ATOM. */
+static const char *
+atom_to_string (const struct atom *atom)
+{
+ assert (atom != NULL);
+
+ return atom->string;
+}
+
+/* A hsh_compare_func that compares A and B. */
+static int
+compare_atoms (const void *a_, const void *b_, void *aux unused)
+{
+ const struct atom *a = a_;
+ const struct atom *b = b_;
+
+ return strcmp (a->string, b->string);
+}
+
+/* A hsh_hash_func that hashes ATOM. */
+static unsigned
+hash_atom (const void *atom_, void *aux unused)
+{
+ const struct atom *atom = atom_;
+
+ return hsh_hash_string (atom->string);
+}
+
+/* A hsh_free_func that destroys ATOM. */
+static void
+free_atom (void *atom_, void *aux unused)
+{
+ struct atom *atom = atom_;
+
+ free (atom->string);
+ free (atom);
+}
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@gnu.org>.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+#ifndef VAL_LABS_H
+#define VAL_LABS_H 1
+
+#include <stddef.h>
+#include "var.h"
+
+struct val_labs;
+
+struct val_lab
+ {
+ union value value;
+ const char *label;
+ };
+
+struct val_labs *val_labs_create (int width);
+struct val_labs *val_labs_copy (const struct val_labs *);
+void val_labs_set_width (struct val_labs *, int new_width);
+void val_labs_destroy (struct val_labs *);
+void val_labs_clear (struct val_labs *);
+size_t val_labs_count (struct val_labs *);
+
+int val_labs_add (struct val_labs *, union value, const char *);
+int val_labs_replace (struct val_labs *, union value, const char *);
+int val_labs_remove (struct val_labs *, union value);
+const char *val_labs_find (const struct val_labs *, union value);
+
+struct val_labs_iterator;
+
+struct val_lab *val_labs_first (const struct val_labs *,
+ struct val_labs_iterator **);
+struct val_lab *val_labs_first_sorted (const struct val_labs *,
+ struct val_labs_iterator **);
+struct val_lab *val_labs_next (const struct val_labs *,
+ struct val_labs_iterator **);
+void val_labs_done (struct val_labs_iterator **);
+
+#endif /* value-labels.h */
/* Sometimes we insert value's in a hash table. */
unsigned long hash[SIZEOF_DOUBLE / SIZEOF_LONG];
};
-
-/* Describes one value label. */
-struct value_label
- {
- union value v; /* The value being labeled. */
- char *s; /* Pointer to malloc()'d label. */
- int ref_count; /* Reference count. */
- };
\f
/* Frequency tables. */
int mode; /* FRQM_GENERAL or FRQM_INTEGER. */
/* General mode. */
- struct avl_tree *tree; /* Undifferentiated data. */
+ struct hsh_table *data; /* Undifferentiated data. */
/* Integer mode. */
double *vector; /* Frequencies proper. */
double sysmis; /* Sum of weights of SYSMIS values. */
/* All modes. */
- struct freq *valid; /* Valid freqs. */
+ struct freq *valid; /* Valid freqs. */
int n_valid; /* Number of total freqs. */
struct freq *missing; /* Missing freqs. */
struct fmt_spec write; /* Default format for WRITE. */
/* Labels. */
- struct avl_tree *val_lab; /* Avltree of value_label structures. */
+ struct val_labs *val_labs;
char *label; /* Variable label. */
/* Per-procedure info. */
}
p;
};
+
+int compare_variables (const void *, const void *, void *);
+unsigned hash_variable (const void *, void *);
\f
/* Cases. */
struct dictionary
{
struct variable **var; /* Variable descriptions. */
- struct avl_tree *var_by_name; /* Variables arranged by name. */
+ struct hsh_table *name_tab; /* Variables arranged by name. */
int nvar; /* Number of variables. */
int N; /* Current case limit (N command). */
void fill_all_vars (struct variable ***, int *, int flags);
-int val_lab_cmp (const void *, const void *, void *);
-char *get_val_lab (const struct variable *, union value, int);
-void free_val_lab (void *, void *);
-void free_value_label (struct value_label *);
-struct avl_tree *copy_value_labels (struct avl_tree *);
-
void dump_split_vars (const struct ccase *);
int is_num_user_missing (double, const struct variable *);
int is_user_missing (const union value *, const struct variable *);
void copy_missing_values (struct variable *dest, const struct variable *src);
-int cmp_variable (const void *, const void *, void *);
-
#if GLOBAL_DEBUGGING
struct variable *force_create_variable (struct dictionary *, const char *name,
int type, int width);
#include <stdlib.h>
#include "alloc.h"
#include "approx.h"
-#include "avl.h"
#include "command.h"
#include "do-ifP.h"
#include "expr.h"
#include "file-handle.h"
+#include "hash.h"
#include "inpt-pgm.h"
#include "misc.h"
#include "str.h"
#include "var.h"
#include "vector.h"
+#include "value-labels.h"
#include "vfm.h"
#include "debug-print.h"
-#if DEBUGGING
-/* Dumps one variable to standard output. */
-void
-dump_one_var_node (void * pnode, void *param, int level)
-{
- struct variable *node = pnode;
- int i;
-
- for (i = 0; i < level - 1; i++)
- printf (" ");
- if (node == NULL)
- printf ("NULL_TREE\n");
- else
- printf ("%p=>%s\n", node, node->name ? node->name : "<null>");
-}
-
-/* Dumps a tree of the variables to standard output. */
-void
-dump_var_tree (void)
-{
- printf (_("Vartree:\n"));
-/*
- avl_walk_inorder (default_dict.var_by_name, dump_one_var_node, NULL);
-*/
-}
-#endif
-
/* Clear the default dictionary. Note: This is probably not what you
want to do. Use discard_variables() instead. */
void
struct variable *
find_variable (const char *name)
{
- return avl_find (default_dict.var_by_name, (struct variable *) name);
+ return hsh_find (default_dict.name_tab, name);
}
/* Find and return the variable in dictionary D having name NAME, or
struct variable *
find_dict_variable (const struct dictionary *d, const char *name)
{
- return avl_find (d->var_by_name, (struct variable *) name);
+ return hsh_find (d->name_tab, name);
}
/* Creates a variable named NAME in dictionary DICT having type TYPE
/* Avoid problems with overlap. */
strcpy (v->name, name);
- avl_force_insert (dict->var_by_name, v);
+ hsh_force_insert (dict->name_tab, v);
v->type = type;
v->left = name[0] == '#';
v->fv = dict->nval;
dict->nval += v->nv;
v->label = NULL;
- v->val_lab = NULL;
+ v->val_labs = val_labs_create (width);
v->get.fv = -1;
if (vfm_source == &input_program_source
rename_variable (struct dictionary * dict, struct variable *v,
const char *new_name)
{
- assert (dict && dict->var_by_name && v && new_name);
- avl_delete (dict->var_by_name, v);
+ assert (dict && dict->name_tab && v && new_name);
+ hsh_delete (dict->name_tab, v);
strncpy (v->name, new_name, 9);
- avl_force_insert (dict->var_by_name, v);
+ hsh_force_insert (dict->name_tab, v);
}
/* Delete the contents of variable V within dictionary DICT. Does not
void
clear_variable (struct dictionary *dict, struct variable *v)
{
- assert (dict && v);
+ assert (dict != NULL);
+ assert (v != NULL);
-#if DEBUGGING
- printf (_("clearing variable %d:%s %s\n"), v->index, v->name,
- (dict == &default_dict ? _("in default dictionary")
- : _("in auxiliary dictionary")));
- if (dict->var_by_name != NULL)
- dump_var_tree ();
-#endif
+ if (dict->name_tab != NULL)
+ hsh_force_delete (dict->name_tab, v);
- if (dict->var_by_name != NULL)
- avl_force_delete (dict->var_by_name, v);
-
- if (v->val_lab)
- {
- avl_destroy (v->val_lab, free_val_lab);
- v->val_lab = NULL;
- }
+ val_labs_clear (v->val_labs);
if (v->label)
{
dict->splits = NULL;
}
}
-
-#if DEBUGGING
- if (dict->var_by_name != NULL)
- dump_var_tree ();
-#endif
}
/* Creates a new variable in dictionary DICT, whose properties are
dict->nval += new_var->nv;
strcpy (new_var->name, name);
- avl_force_insert (dict->var_by_name, new_var);
+ hsh_force_insert (dict->name_tab, new_var);
return new_var;
}
}
-/* Decrements the reference count for value label V. Destroys the
- value label if the reference count reaches zero. */
-void
-free_value_label (struct value_label * v)
-{
- assert (v->ref_count >= 1);
- if (--v->ref_count == 0)
- {
- free (v->s);
- free (v);
- }
-}
-
-/* Frees value label P. PARAM is ignored. Used as a callback with
- avl_destroy(). */
-void
-free_val_lab (void *p, void *param unused)
-{
- free_value_label ((struct value_label *) p);
-}
-
-/* Returns a value label corresponding to VAL in variable V padded to
- length N. If N==0 then no padding is performed, and NULL is
- returned if no label exists. (Normally a string of spaces is
- returned in this case.) */
-char *
-get_val_lab (const struct variable *v, union value val, int n)
-{
- static char *buf;
- static int bufsize;
- struct value_label template, *find;
-
- if (bufsize < n)
- {
- buf = xrealloc (buf, n + 1);
- bufsize = n;
- }
- if (n)
- buf[0] = 0;
- template.v = val;
- find = NULL;
- if (v->val_lab)
- find = avl_find (v->val_lab, &template);
- if (find)
- {
- if (n)
- {
- st_pad_copy (buf, find->s, n + 1);
- return buf;
- }
- else
- return find->s;
- }
- else
- {
- if (n)
- {
- memset (buf, ' ', n);
- buf[n] = '\0';
- return buf;
- }
- else
- return NULL;
- }
-}
-
/* Return nonzero only if X is a user-missing value for numeric
variable V. */
inline int
}
abort ();
}
+\f
+/* A hsh_compare_func that orders variables A and B by their
+ names. */
+int
+compare_variables (const void *a_, const void *b_, void *foo unused)
+{
+ const struct variable *a = a_;
+ const struct variable *b = b_;
+
+ return strcmp (a->name, b->name);
+}
+
+/* A hsh_hash_func that hashes variable V based on its name. */
+unsigned
+hash_variable (const void *v_, void *foo unused)
+{
+ const struct variable *v = v_;
+
+ return hsh_hash_string (v->name);
+}
#include <ctype.h>
#include <stdlib.h>
#include "alloc.h"
-#include "avl.h"
#include "bitvector.h"
#include "error.h"
+#include "hash.h"
#include "lexer.h"
#include "misc.h"
#include "str.h"
int
is_varname (const char *s)
{
- return avl_find (default_dict.var_by_name, (struct variable *) s) != 0;
+ return hsh_find (default_dict.name_tab, s) != NULL;
}
int
is_dict_varname (const struct dictionary *dict, const char *s)
{
- return avl_find (dict->var_by_name, (struct variable *) s) != 0;
+ return hsh_find (dict->name_tab, s) != NULL;
}
struct variable *
return NULL;
}
- vp = avl_find (dict->var_by_name, (struct variable *) tokid);
+ vp = hsh_find (dict->name_tab, tokid);
if (!vp)
msg (SE, _("%s is not a variable name."), tokid);
lex_get ();
#include "tab.h"
#include "var.h"
#include "vector.h"
+#include "value-labels.h"
#include "vfm.h"
#include "vfmP.h"
arrange_compaction ();
make_temp_case ();
vector_initialization ();
- setup_randomize ();
discard_ctl_stack ();
setup_filter ();
setup_lag ();
{
struct variable *v = *iter;
char temp_buf[80];
- char *val_lab;
+ const char *val_lab;
assert (v->type == NUMERIC || v->type == ALPHA);
tab_text (t, 0, i + 1, TAB_LEFT | TAT_PRINTF, "%s", v->name);
temp_buf[v->print.w] = 0;
tab_text (t, 1, i + 1, TAT_PRINTF, "%.*s", v->print.w, temp_buf);
- val_lab = get_val_lab (v, c->data[v->fv], 0);
+ val_lab = val_labs_find (v->val_labs, c->data[v->fv]);
if (val_lab)
tab_text (t, 2, i + 1, TAB_LEFT, val_lab);
}