+Mon Mar 29 16:26:40 2004 Ben Pfaff <blp@gnu.org>
+
+ * debug.c: Removed. Moved cmd_debug_evaluate() into expr-evl.c.
+
+ * expr-evl.c: (cmd_debug_evaluate) Moved here from debug.c.
+
+Mon Mar 29 16:03:08 2004 Ben Pfaff <blp@gnu.org>
+
+ * algorithm.c: By default turn off some of the more expensive
+ assertions.
+ (expensive_assert) New macro which expands to assert if
+ EXTRA_CHECKS is defined, to nothing otherwise.
+ (unique) Use expensive_assert().
+ (binary_search) Ditto.
+ (push_heap) Ditto.
+ (pop_heap) Ditto.
+ (make_heap) Ditto.
+ (sort_heap) Ditto.
+
+ * command.c: (conflicting_3char_prefixes) Words that are the same
+ don't cause conflicts when they are abbreviated to the first three
+ letters.
+
+ * expr-evl.c: (CONCAT_func) Fix memory leak by incrementing struct
+ nonterm_node's n earlier.
+ (generic_str_func) Ditto.
+
+Mon Mar 29 15:32:17 2004 Ben Pfaff <blp@gnu.org>
+
+ Add support for multipass procedures. Rewrite DESCRIPTIVES to
+ test multipass support, take advantage of new moments
+ calculation, and to not be such crappy code. Get rid of q2c
+ processing for DESCRIPTIVES.
+
+ * vfm.c: (struct multipass_split_aux_data) New structure.
+ (multipass_procedure_with_splits) New function.
+ (multipass_split_callback) New function.
+ (multipass_split_output) New function.
+ * descript.q: Removed.
+
+ * descript.c: New file.
+
+ * var.h: Removed descriptives enums.
+ (struct descriptives_proc) Removed.
+ (struct variable) Removed p.dsc.
+
+ * Makefile.am: (q_sources_c) Remove descript.c.
+ (q_sources_q) Removed descript.q.
+
+Mon Mar 29 15:31:55 2004 Ben Pfaff <blp@gnu.org>
+
+ New manager for keeping track of used workspace.
+
+ * workspace.c: New file.
+
+ * workspace.h: New file.
+
+ * Makefile.am: (pspp_SOURCES) Add workspace.c, workspace.h.
+
+ * sort.c: (do_internal_sort) Use workspace_malloc().
+ (destroy_internal_sort) Use workspace_free().
+
+Mon Mar 29 15:31:08 2004 Ben Pfaff <blp@gnu.org>
+
+ New `struct casefile' for managing sets of cases.
+
+ * casefile.c: New file.
+
+ * casefile.h: New file.
+
+ * command.def: Add DEBUG CASEFILE command.
+
+ * Makefile.am: (pspp_SOURCES) Add casefile.c, casefile.h.
+
+ * sort.c: (sort_cases) Move logic for sending storage file to disk
+ into do_external_sort().
+ (struct internal_sort) Use an array of ccase pointers instead of a
+ case_list.
+ (do_internal_sort) Rewrite to handle casefiles.
+ (compare_case_list) Removed.
+ (compare_cases) New function.
+ (compare_case_dblptrs) New function.
+ (read_internal_sort_output) Deal with new struct internal_sort.
+
+ * vfm.c: (static var workspace_overflow) Removed.
+ (struct storage_stream_info) Removed all the members. Added
+ struct casefile * member.
+ (storage_sink_open) Use casefile.
+ (open_storage_file) Removed.
+ (write_storage_file) Removed.
+ (storage_to_disk) Removed.
+ (destroy_storage_stream_info) Use casefile.
+ (storage_sink_write) Use casefile.
+ (storage_sink_make_source) Use casefile.
+ (storage_source_count) Use casefile.
+ (storage_source_read) Use casefile.
+ (storage_source_on_disk) Removed.
+ (storage_source_get_cases) Removed.
+ (storage_source_set_cases) Removed.
+ (storage_source_get_casefile) New function.
+
+Mon Mar 29 15:30:09 2004 Ben Pfaff <blp@gnu.org>
+
+ New `struct moments' for calculating moments.
+
+ * stats.c: Removed.
+
+ * stats.h: Removed.
+
+ * moments.c: New file.
+
+ * moments.h: New file.
+
+ * command.def: Add DEBUG MOMENTS command.
+
+ * Makefile.am: (pspp_SOURCES) Add moments.c, moments.h. Remove
+ stats.c, stats.h.
+
+ * aggregate.c: Modify AGGREGATE to use the new moments
+ calculation, even if not in such a great way.
+ (struct agr_var) Add `moments' member.
+ (parse_aggregate_functions) Set `moments' member to null.
+ (agr_destroy) Destroy `moments' member.
+ (accumulate_aggregate_info) Use `moments' for standard deviation.
+ (dump_aggregate_info) Ditto.
+ (initialize_aggregate_info) Create or clear `moments'.
+
+ * misc.h: Add pow2(), pow3(), pow4() functions in place of sqr(),
+ cube(), pow4() that were in stats.h. All references updated.
+
+ * crosstabs.q: stats.h had chi-square significance functions. Use
+ GSL instead.
+ (display_chisq) Use gsl_cdf_chisq_Q() instead of chisq_sig().
+
+ * expr-evl.c: (expr_evaluate) Use moments_of_values() for
+ OP_CFVAR, OP_MEAN, OP_SD, OP_VARIANCE.
+
Fri Mar 26 14:21:23 2004 Ben Pfaff <blp@gnu.org>
* dictionary.c: (dict_compact_values) Compacted values need to
.q.c:
./q2c $< $@
-q_sources_c = correlations.c crosstabs.c descript.c file-handle.c \
+q_sources_c = correlations.c crosstabs.c file-handle.c \
frequencies.c list.c means.c set.c t-test.c
-q_sources_q = correlations.q crosstabs.q descript.q file-handle.q \
+q_sources_q = correlations.q crosstabs.q file-handle.q \
frequencies.q list.q means.q set.q t-test.q
-pspp_SOURCES = $(q_sources_c) \
-aggregate.c algorithm.c algorithm.h alloc.c alloc.h \
-apply-dict.c ascii.c autorecode.c bitvector.h \
-cmdline.c cmdline.h command.c command.def command.h compute.c \
-copyleft.c copyleft.h \
-count.c data-in.c data-in.h data-list.c data-list.h \
-data-out.c date.c debug.c debug-print.h devind.c devind.h dfm.c dfm.h \
-dictionary.c do-if.c do-ifP.h error.c error.h expr-evl.c expr-opt.c \
-expr-prs.c expr.h exprP.h file-handle.h file-type.c \
-filename.c filename.h flip.c font.h format.c format.def format.h \
-formats.c get.c getline.c getline.h glob.c glob.h \
-groff-font.c hash.c hash.h html.c htmlP.h include.c \
-inpt-pgm.c lexer.c lexer.h levene.c levene.h \
-log.h loop.c magic.c magic.h main.c \
-main.h matrix-data.c matrix.c matrix.h mis-val.c misc.c misc.h \
-modify-vars.c numeric.c output.c output.h pfm-read.c pfm-write.c pfm.h \
-pool.c pool.h postscript.c print.c random.c random.h recode.c \
-rename-vars.c repeat.c repeat.h sample.c sel-if.c settings.h \
+pspp_SOURCES = $(q_sources_c) aggregate.c algorithm.c algorithm.h \
+alloc.c alloc.h apply-dict.c ascii.c autorecode.c bitvector.h \
+casefile.c casefile.h cmdline.c cmdline.h command.c command.def \
+command.h compute.c copyleft.c copyleft.h count.c data-in.c data-in.h \
+data-list.c data-list.h data-out.c date.c debug-print.h descript.c \
+devind.c devind.h dfm.c dfm.h dictionary.c do-if.c do-ifP.h error.c \
+error.h expr-evl.c expr-opt.c expr-prs.c expr.h exprP.h file-handle.h \
+file-type.c filename.c filename.h flip.c font.h format.c format.def \
+format.h formats.c get.c getline.c getline.h glob.c glob.h \
+groff-font.c hash.c hash.h html.c htmlP.h include.c inpt-pgm.c lexer.c \
+lexer.h levene.c levene.h log.h loop.c magic.c magic.h main.c main.h \
+matrix-data.c matrix.c matrix.h mis-val.c misc.c misc.h modify-vars.c \
+moments.c moments.h numeric.c output.c output.h pfm-read.c pfm-write.c \
+pfm.h pool.c pool.h postscript.c print.c random.c random.h recode.c \
+rename-vars.c repeat.c repeat.h sample.c sel-if.c settings.h \
sfm-read.c sfm-write.c sfm.h sfmP.h som.c som.h sort.c sort.h \
-split-file.c stat.h stats.c stats.h str.c str.h sysfile-info.c tab.c \
-tab.h temporary.c title.c t-test.h val.h val-labs.c value-labels.c \
-value-labels.h var-labs.c var.h vars-atr.c vars-prs.c vector.c \
-version.c version.h vfm.c vfm.h vfmP.h weight.c
+split-file.c str.c str.h sysfile-info.c tab.c tab.h temporary.c \
+title.c t-test.h val.h val-labs.c value-labels.c value-labels.h \
+var-labs.c var.h vars-atr.c vars-prs.c vector.c version.c version.h \
+vfm.c vfm.h vfmP.h weight.c workspace.c workspace.h
-pspp_LDADD = ../lib/julcal/libjulcal.a \
+pspp_LDADD = ../lib/julcal/libjulcal.a \
../lib/misc/libmisc.a \
@LIBINTL@
#include "file-handle.h"
#include "lexer.h"
#include "misc.h"
+#include "moments.h"
#include "pool.h"
#include "settings.h"
#include "sfm.h"
#include "sort.h"
-#include "stats.h"
#include "str.h"
#include "var.h"
#include "vfm.h"
int int1, int2;
char *string;
int missing;
+ struct moments *moments;
};
/* Aggregation functions. */
agr->vars = v;
tail = v;
tail->next = NULL;
+ v->moments = NULL;
/* Create the target variable in the aggregate
dictionary. */
free (iter->arg[i].c);
free (iter->string);
}
+ else if (iter->function == SD)
+ moments_destroy (iter->moments);
free (iter);
}
free (agr->prev_break);
iter->dbl[0] += v->f * weight;
iter->dbl[1] += weight;
break;
- case SD:
- {
- double product = v->f * weight;
- iter->dbl[0] += product;
- iter->dbl[1] += product * v->f;
- iter->dbl[2] += weight;
- break;
- }
+ case SD:
+ moments_pass_two (iter->moments, v->f, weight);
+ break;
case MAX:
iter->dbl[0] = max (iter->dbl[0], v->f);
iter->int1 = 1;
v->f = i->dbl[1] != 0.0 ? i->dbl[0] / i->dbl[1] : SYSMIS;
break;
case SD:
- v->f = ((i->dbl[2] > 1.0)
- ? calc_stddev (calc_variance (i->dbl, i->dbl[2]))
- : SYSMIS);
+ {
+ double variance;
+
+ /* FIXME: we should use two passes. */
+ moments_calculate (i->moments, NULL, NULL, &variance,
+ NULL, NULL);
+ if (variance != SYSMIS)
+ v->f = sqrt (variance);
+ else
+ v->f = SYSMIS;
+ }
break;
case MAX:
case MIN:
case MAX | FSTRING:
memset (iter->string, 0, iter->src->width);
break;
+ case SD:
+ if (iter->moments == NULL)
+ iter->moments = moments_create (MOMENT_VARIANCE);
+ else
+ moments_clear (iter->moments);
+ break;
default:
iter->dbl[0] = iter->dbl[1] = iter->dbl[2] = 0.0;
iter->int1 = iter->int2 = 0;
#include "alloc.h"
#include "random.h"
-/* Some of the assertions in this file are very expensive. If
- we're optimizing, don't include them. */
-#if __OPTIMIZE__
-#define NDEBUG
+/* Some of the assertions in this file are very expensive. We
+ don't use them by default. */
+#ifdef EXTRA_CHECKS
+#define expensive_assert(X) assert(X)
+#else
+#define expensive_assert(X) ((void) 0)
#endif
#include "error.h"
\f
first += size;
if (first >= last)
{
- assert (adjacent_find_equal (array, count, size, compare, aux) == NULL);
+ assert (adjacent_find_equal (array, count,
+ size, compare, aux) == NULL);
return count;
}
}
}
- assert (find (array, count, size, value, compare, aux) == NULL);
+ expensive_assert (find (array, count, size, value, compare, aux) == NULL);
return NULL;
}
\f
unsigned char *first = array;
size_t i;
- assert (count < 1 || is_heap (array, count - 1, size, compare, aux));
+ expensive_assert (count < 1 || is_heap (array, count - 1,
+ size, compare, aux));
for (i = count; i > 1; i /= 2)
{
unsigned char *parent = first + (i / 2 - 1) * size;
else
break;
}
- assert (is_heap (array, count, size, compare, aux));
+ expensive_assert (is_heap (array, count, size, compare, aux));
}
/* ARRAY contains COUNT elements of SIZE bytes each. Initially
{
unsigned char *first = array;
- assert (is_heap (array, count, size, compare, aux));
+ expensive_assert (is_heap (array, count, size, compare, aux));
SWAP (first, first + (count - 1) * size, size);
heapify (first, count - 1, size, 1, compare, aux);
- assert (count < 1 || is_heap (array, count - 1, size, compare, aux));
+ expensive_assert (count < 1 || is_heap (array, count - 1,
+ size, compare, aux));
}
/* Turns ARRAY, which contains COUNT elements of SIZE bytes, into
for (idx = count / 2; idx >= 1; idx--)
heapify (array, count, size, idx, compare, aux);
- assert (count < 1 || is_heap (array, count, size, compare, aux));
+ expensive_assert (count < 1 || is_heap (array, count, size, compare, aux));
}
/* ARRAY contains COUNT elements of SIZE bytes each. Initially
unsigned char *first = array;
size_t idx;
- assert (is_heap (array, count, size, compare, aux));
+ expensive_assert (is_heap (array, count, size, compare, aux));
for (idx = count; idx >= 2; idx--)
{
SWAP (first, first + (idx - 1) * size, size);
heapify (array, idx - 1, size, 1, compare, aux);
}
- assert (is_sorted (array, count, size, compare, aux));
+ expensive_assert (is_sorted (array, count, size, compare, aux));
}
/* ARRAY contains COUNT elements of SIZE bytes each. This
(((unsigned char *) X)[(Y) / CHAR_BIT] & (1 << ((Y) % CHAR_BIT)))
/* Returns 2**X, 0 <= X < 32. */
-#define BIT_INDEX(X) \
- (1ul << (X))
+#define BIT_INDEX(X) (1ul << (X))
#endif /* bitvector.h */
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2004 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@gnu.org>.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+#include <config.h>
+#include "casefile.h"
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "alloc.h"
+#include "error.h"
+#include "misc.h"
+#include "var.h"
+#include "workspace.h"
+
+#define IO_BUF_SIZE 8192
+
+/* A casefile is a sequentially accessible array of immutable
+ cases. It may be stored in memory or on disk as workspace
+ allows. Cases may be appended to the end of the file. Cases
+ may be read sequentially starting from the beginning of the
+ file. Once any cases have been read, no more cases may be
+ appended. The entire file is discarded at once. */
+
+/* A casefile. */
+struct casefile
+ {
+ /* Basic data. */
+ struct casefile *next, *prev; /* Next, prev in global list. */
+ size_t case_size; /* Case size in bytes. */
+ size_t case_list_size; /* Bytes to allocate for case_lists. */
+ unsigned long case_cnt; /* Number of cases stored. */
+ enum { MEMORY, DISK } storage; /* Where cases are stored. */
+ enum { WRITE, READ } mode; /* Is writing or reading allowed? */
+ struct casereader *readers; /* List of our readers. */
+
+ /* Memory storage. */
+ struct case_list *head, *tail; /* Beginning, end of list of cases. */
+
+ /* Disk storage. */
+ int fd; /* File descriptor, -1 if none. */
+ char *filename; /* Filename. */
+ unsigned char *buffer; /* I/O buffer, NULL if none. */
+ size_t buffer_used; /* Number of bytes used in buffer. */
+ size_t buffer_size; /* Buffer size in bytes. */
+ };
+
+/* For reading out the casing in a casefile. */
+struct casereader
+ {
+ struct casereader *next, *prev; /* Next, prev in casefile's list. */
+ struct casefile *cf; /* Our casefile. */
+ unsigned long case_idx; /* Case number of current case. */
+
+ /* Memory storage. */
+ struct case_list *cur; /* Current case. */
+
+ /* Disk storage. */
+ int fd; /* File descriptor. */
+ unsigned char *buffer; /* I/O buffer. */
+ size_t buffer_pos; /* Byte offset of buffer position. */
+ };
+
+struct casefile *casefiles;
+
+static void register_atexit (void);
+static void exit_handler (void);
+
+static void reader_open_file (struct casereader *reader);
+static void write_case_to_disk (struct casefile *cf, const struct ccase *c);
+static void flush_buffer (struct casefile *cf);
+static void fill_buffer (struct casereader *reader);
+
+static int safe_open (const char *filename, int flags);
+static int safe_close (int fd);
+static int full_read (int fd, char *buffer, size_t size);
+static int full_write (int fd, const char *buffer, size_t size);
+
+/* Creates and returns a casefile to store cases of CASE_SIZE bytes each. */
+struct casefile *
+casefile_create (size_t case_size)
+{
+ struct casefile *cf = xmalloc (sizeof *cf);
+ cf->next = casefiles;
+ cf->prev = NULL;
+ if (cf->next != NULL)
+ cf->next->prev = cf;
+ casefiles = cf;
+ cf->case_size = case_size;
+ cf->case_list_size = sizeof *cf->head + case_size - sizeof *cf->head->c.data;
+ cf->case_cnt = 0;
+ cf->storage = MEMORY;
+ cf->mode = WRITE;
+ cf->readers = NULL;
+ cf->head = cf->tail = NULL;
+ cf->fd = -1;
+ cf->filename = NULL;
+ cf->buffer = NULL;
+ cf->buffer_size = ROUND_UP (case_size, IO_BUF_SIZE);
+ if (case_size > 0 && cf->buffer_size % case_size > 512)
+ cf->buffer_size = case_size;
+ cf->buffer_used = 0;
+ register_atexit ();
+ return cf;
+}
+
+/* Destroys casefile CF. */
+void
+casefile_destroy (struct casefile *cf)
+{
+ if (cf != NULL)
+ {
+ struct case_list *iter, *next;
+
+ if (cf->next != NULL)
+ cf->next->prev = cf->prev;
+ if (cf->prev != NULL)
+ cf->prev->next = cf->next;
+ if (casefiles == cf)
+ casefiles = cf->next;
+
+ while (cf->readers != NULL)
+ casereader_destroy (cf->readers);
+
+ for (iter = cf->head; iter != NULL; iter = next)
+ {
+ next = iter->next;
+ workspace_free (iter, cf->case_list_size);
+ }
+
+ if (cf->fd != -1)
+ safe_close (cf->fd);
+
+ if (cf->filename != NULL && remove (cf->filename) == -1)
+ msg (ME, _("%s: Removing temporary file: %s."),
+ cf->filename, strerror (errno));
+
+ free (cf->buffer);
+
+ free (cf);
+ }
+}
+
+/* Returns nonzero only if casefile CF is stored in memory (instead of on
+ disk). */
+int
+casefile_in_core (const struct casefile *cf)
+{
+ assert (cf != NULL);
+
+ return cf->storage == MEMORY;
+}
+
+/* Returns the number of bytes in a case for CF. */
+size_t
+casefile_get_case_size (const struct casefile *cf)
+{
+ assert (cf != NULL);
+
+ return cf->case_size;
+}
+
+/* Returns the number of cases in casefile CF. */
+unsigned long
+casefile_get_case_cnt (const struct casefile *cf)
+{
+ assert (cf != NULL);
+
+ return cf->case_cnt;
+}
+
+/* Appends case C to casefile CF. Not valid after any reader for CF has been
+ created. */
+void
+casefile_append (struct casefile *cf, const struct ccase *c)
+{
+ assert (cf != NULL);
+ assert (c != NULL);
+ assert (cf->mode == WRITE);
+
+ cf->case_cnt++;
+
+ /* Try memory first. */
+ if (cf->storage == MEMORY)
+ {
+ struct case_list *new_case;
+
+ new_case = workspace_malloc (cf->case_list_size);
+ if (new_case != NULL)
+ {
+ memcpy (&new_case->c, c, cf->case_size);
+ new_case->next = NULL;
+ if (cf->head != NULL)
+ cf->tail->next = new_case;
+ else
+ cf->head = new_case;
+ cf->tail = new_case;
+ }
+ else
+ {
+ casefile_to_disk (cf);
+ assert (cf->storage == DISK);
+ write_case_to_disk (cf, c);
+ }
+ }
+ else
+ write_case_to_disk (cf, c);
+}
+
+/* Writes case C to casefile CF's disk buffer, first flushing the buffer to
+ disk if it would otherwise overflow. */
+static void
+write_case_to_disk (struct casefile *cf, const struct ccase *c)
+{
+ memcpy (cf->buffer + cf->buffer_used, c->data, cf->case_size);
+ cf->buffer_used += cf->case_size;
+ if (cf->buffer_used + cf->case_size > cf->buffer_size)
+ flush_buffer (cf);
+
+}
+
+static void
+flush_buffer (struct casefile *cf)
+{
+ if (cf->buffer_used > 0)
+ {
+ if (!full_write (cf->fd, cf->buffer, cf->buffer_size))
+ msg (FE, _("Error writing temporary file: %s."), strerror (errno));
+
+ cf->buffer_used = 0;
+ }
+}
+
+/* Creates a temporary file and stores its name in *FILENAME and
+ a file descriptor for it in *FD. Returns success. Caller is
+ responsible for freeing *FILENAME. */
+static int
+make_temp_file (int *fd, char **filename)
+{
+ const char *parent_dir;
+
+ assert (filename != NULL);
+ assert (fd != NULL);
+
+ if (getenv ("TMPDIR") != NULL)
+ parent_dir = getenv ("TMPDIR");
+ else
+ parent_dir = P_tmpdir;
+
+ *filename = xmalloc (strlen (parent_dir) + 32);
+ sprintf (*filename, "%s%cpsppXXXXXX", parent_dir, DIR_SEPARATOR);
+ *fd = mkstemp (*filename);
+ if (*fd < 0)
+ {
+ msg (FE, _("%s: Creating temporary file: %s."),
+ *filename, strerror (errno));
+ free (*filename);
+ *filename = NULL;
+ return 0;
+ }
+ return 1;
+}
+
+/* If CF is currently stored in memory, writes it to disk. Readers, if any,
+ retain their current positions. */
+void
+casefile_to_disk (struct casefile *cf)
+{
+ struct case_list *iter, *next;
+ struct casereader *reader;
+
+ assert (cf != NULL);
+
+ if (cf->storage == MEMORY)
+ {
+ assert (cf->filename == NULL);
+ assert (cf->fd == -1);
+ assert (cf->buffer_used == 0);
+
+ cf->storage = DISK;
+ if (!make_temp_file (&cf->fd, &cf->filename))
+ err_failure ();
+#if HAVE_POSIX_FADVISE
+ posix_fadvise (cf->fd, 0, 0, POSIX_FADV_SEQUENTIAL);
+#endif
+ cf->buffer = xmalloc (cf->buffer_size);
+ memset (cf->buffer, 0, cf->buffer_size);
+
+ for (iter = cf->head; iter != NULL; iter = next)
+ {
+ next = iter->next;
+ write_case_to_disk (cf, &iter->c);
+ workspace_free (iter, cf->case_list_size);
+ }
+ flush_buffer (cf);
+ cf->head = cf->tail = NULL;
+
+ for (reader = cf->readers; reader != NULL; reader = reader->next)
+ reader_open_file (reader);
+ }
+}
+
+/* Merges lists A and B into a single list, which is returned. Cases are
+ compared according to comparison function COMPARE, which receives auxiliary
+ data AUX. */
+static struct case_list *
+merge (struct case_list *a, struct case_list *b,
+ int (*compare) (const struct ccase *,
+ const struct ccase *, void *aux),
+ void *aux)
+{
+ struct case_list head;
+ struct case_list *tail = &head;
+
+ while (a != NULL && b != NULL)
+ if (compare (&a->c, &b->c, aux) < 0)
+ {
+ tail->next = a;
+ tail = a;
+ a = a->next;
+ }
+ else
+ {
+ tail->next = b;
+ tail = b;
+ b = b->next;
+ }
+
+ tail->next = a == NULL ? b : a;
+
+ return head.next;
+}
+
+/* Sorts the list beginning at FIRST, returning the new first case. Cases are
+ compared according to comparison function COMPARE, which receives auxiliary
+ data AUX. */
+static struct case_list *
+merge_sort (struct case_list *first,
+ int (*compare) (const struct ccase *,
+ const struct ccase *, void *aux),
+ void *aux)
+{
+ /* FIXME: we should use a "natural" merge sort to take
+ advantage of the natural order of the data. */
+ struct case_list *middle, *last, *tmp;
+
+ /* A list of zero or one elements is already sorted. */
+ if (first == NULL || first->next == NULL)
+ return first;
+
+ middle = first;
+ last = first->next;
+ while (last != NULL && last->next != NULL)
+ {
+ middle = middle->next;
+ last = last->next->next;
+ }
+ tmp = middle;
+ middle = middle->next;
+ tmp->next = NULL;
+ return merge (merge_sort (first, compare, aux),
+ merge_sort (middle, compare, aux),
+ compare, aux);
+}
+
+/* Tries to sort casefile CF according to comparison function
+ COMPARE, which is passes auxiliary data AUX. If successful,
+ returns nonzero. Currently only sorting of in-memory
+ casefiles is implemented. */
+int
+casefile_sort (struct casefile *cf,
+ int (*compare) (const struct ccase *,
+ const struct ccase *, void *aux),
+ void *aux)
+{
+ assert (cf != NULL);
+ assert (compare != NULL);
+
+ cf->mode = WRITE;
+
+ if (cf->case_cnt < 2)
+ return 1;
+ else if (cf->storage == DISK)
+ return 0;
+ else
+ {
+ cf->head = cf->tail = merge_sort (cf->head, compare, aux);
+ while (cf->tail->next != NULL)
+ cf->tail = cf->tail->next;
+
+ return 1;
+ }
+}
+
+/* Creates and returns a casereader for CF. A casereader can be used to
+ sequentially read the cases in a casefile. */
+struct casereader *
+casefile_get_reader (const struct casefile *cf_)
+{
+ struct casefile *cf = (struct casefile *) cf_;
+ struct casereader *reader;
+
+ /* Flush the buffer to disk if it's not empty. */
+ if (cf->mode == WRITE && cf->storage == DISK)
+ flush_buffer (cf);
+
+ cf->mode = READ;
+
+ reader = xmalloc (sizeof *reader);
+ reader->cf = cf;
+ reader->next = cf->readers;
+ if (cf->readers != NULL)
+ reader->next->prev = reader;
+ reader->prev = NULL;
+ cf->readers = reader;
+ reader->case_idx = 0;
+ reader->cur = NULL;
+ reader->fd = -1;
+ reader->buffer = NULL;
+ reader->buffer_pos = 0;
+
+ if (reader->cf->storage == MEMORY)
+ reader->cur = cf->head;
+ else
+ reader_open_file (reader);
+
+ return reader;
+}
+
+/* Opens a disk file for READER and seeks to the current position as indicated
+ by case_idx. Normally the current position is the beginning of the file,
+ but casefile_to_disk may cause the file to be opened at a different
+ position. */
+static void
+reader_open_file (struct casereader *reader)
+{
+ size_t buffer_case_cnt;
+ off_t file_ofs;
+
+ if (reader->case_idx >= reader->cf->case_cnt)
+ return;
+
+ if (reader->cf->fd != -1)
+ {
+ reader->fd = reader->cf->fd;
+ reader->cf->fd = -1;
+ }
+ else
+ {
+ reader->fd = safe_open (reader->cf->filename, O_RDONLY);
+ if (reader->fd < 0)
+ msg (FE, _("%s: Opening temporary file: %s."),
+ reader->cf->filename, strerror (errno));
+ }
+
+ if (reader->cf->buffer != NULL)
+ {
+ reader->buffer = reader->cf->buffer;
+ reader->cf->buffer = NULL;
+ }
+ else
+ {
+ reader->buffer = xmalloc (reader->cf->buffer_size);
+ memset (reader->buffer, 0, reader->cf->buffer_size);
+ }
+
+ if (reader->cf->case_size != 0)
+ {
+ buffer_case_cnt = reader->cf->buffer_size / reader->cf->case_size;
+ file_ofs = ((off_t) reader->case_idx
+ / buffer_case_cnt * reader->cf->buffer_size);
+ reader->buffer_pos = (reader->case_idx % buffer_case_cnt
+ * reader->cf->case_size);
+ }
+ else
+ file_ofs = 0;
+#if HAVE_POSIX_FADVISE
+ posix_fadvise (reader->fd, file_ofs, 0, POSIX_FADV_SEQUENTIAL);
+#endif
+ if (lseek (reader->fd, file_ofs, SEEK_SET) != file_ofs)
+ msg (FE, _("%s: Seeking temporary file: %s."),
+ reader->cf->filename, strerror (errno));
+
+ if (reader->cf->case_cnt > 0 && reader->cf->case_size > 0)
+ fill_buffer (reader);
+}
+
+/* Fills READER's buffer by reading a block from disk. */
+static void
+fill_buffer (struct casereader *reader)
+{
+ int retval = full_read (reader->fd, reader->buffer, reader->cf->buffer_size);
+ if (retval < 0)
+ msg (FE, _("%s: Reading temporary file: %s."),
+ reader->cf->filename, strerror (errno));
+ else if (retval != reader->cf->buffer_size)
+ msg (FE, _("%s: Temporary file ended unexpectedly."),
+ reader->cf->filename);
+}
+
+int
+casereader_read (struct casereader *reader, const struct ccase **c)
+{
+ assert (reader != NULL);
+
+ if (reader->case_idx >= reader->cf->case_cnt)
+ {
+ *c = NULL;
+ return 0;
+ }
+
+ reader->case_idx++;
+ if (reader->cf->storage == MEMORY)
+ {
+ assert (reader->cur != NULL);
+ *c = &reader->cur->c;
+ reader->cur = reader->cur->next;
+ return 1;
+ }
+ else
+ {
+ if (reader->buffer_pos + reader->cf->case_size > reader->cf->buffer_size)
+ {
+ fill_buffer (reader);
+ reader->buffer_pos = 0;
+ }
+
+ *c = (struct ccase *) (reader->buffer + reader->buffer_pos);
+ reader->buffer_pos += reader->cf->case_size;
+ return 1;
+ }
+}
+
+void
+casereader_destroy (struct casereader *reader)
+{
+ assert (reader != NULL);
+
+ if (reader->next != NULL)
+ reader->next->prev = reader->prev;
+ if (reader->prev != NULL)
+ reader->prev->next = reader->next;
+ if (reader->cf->readers == reader)
+ reader->cf->readers = reader->next;
+
+ if (reader->cf->buffer == NULL)
+ reader->cf->buffer = reader->buffer;
+ else
+ free (reader->buffer);
+
+ if (reader->cf->fd == -1)
+ reader->cf->fd = reader->fd;
+ else
+ safe_close (reader->fd);
+
+ free (reader);
+}
+
+static int
+safe_open (const char *filename, int flags)
+{
+ int fd;
+
+ do
+ {
+ fd = open (filename, flags);
+ }
+ while (fd == -1 && errno == EINTR);
+
+ return fd;
+}
+
+static int safe_close (int fd)
+{
+ int retval;
+
+ do
+ {
+ retval = close (fd);
+ }
+ while (retval == -1 && errno == EINTR);
+
+ return retval;
+}
+
+static int
+full_read (int fd, char *buffer, size_t size)
+{
+ size_t bytes_read = 0;
+
+ while (bytes_read < size)
+ {
+ int retval = read (fd, buffer + bytes_read, size - bytes_read);
+ if (retval > 0)
+ bytes_read += retval;
+ else if (retval == 0)
+ return bytes_read;
+ else if (errno != EINTR)
+ return -1;
+ }
+
+ return bytes_read;
+}
+
+static int
+full_write (int fd, const char *buffer, size_t size)
+{
+ size_t bytes_written = 0;
+
+ while (bytes_written < size)
+ {
+ int retval = write (fd, buffer + bytes_written, size - bytes_written);
+ if (retval >= 0)
+ bytes_written += retval;
+ else if (errno != EINTR)
+ return -1;
+ }
+
+ return bytes_written;
+}
+
+static void
+register_atexit (void)
+{
+ static int registered = 0;
+ if (!registered)
+ {
+ registered = 1;
+ atexit (exit_handler);
+ }
+}
+
+static void
+exit_handler (void)
+{
+ while (casefiles != NULL)
+ casefile_destroy (casefiles);
+}
+\f
+#include <stdarg.h>
+#include "command.h"
+#include "random.h"
+#include "lexer.h"
+
+static void test_casefile (int pattern, size_t case_size, size_t case_cnt);
+static struct ccase *get_random_case (size_t case_size, size_t case_idx);
+static void write_random_case (struct casefile *cf, size_t case_idx);
+static void read_and_verify_random_case (struct casefile *cf,
+ struct casereader *reader,
+ size_t case_idx);
+static void fail_test (const char *message, ...);
+
+int
+cmd_debug_casefile (void)
+{
+ static const size_t sizes[] =
+ {
+ 1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 31, 55, 73,
+ 100, 137, 257, 521, 1031, 2053
+ };
+ int size_max;
+ int case_max;
+ int pattern;
+
+ size_max = sizeof sizes / sizeof *sizes;
+ if (lex_match_id ("SMALL"))
+ {
+ size_max -= 4;
+ case_max = 511;
+ }
+ else
+ case_max = 4095;
+ if (token != '.')
+ return lex_end_of_command ();
+
+ for (pattern = 0; pattern < 5; pattern++)
+ {
+ const size_t *size;
+
+ for (size = sizes; size < sizes + size_max; size++)
+ {
+ size_t case_cnt;
+
+ for (case_cnt = 0; case_cnt <= case_max;
+ case_cnt = (case_cnt * 2) + 1)
+ test_casefile (pattern, *size * sizeof (union value), case_cnt);
+ }
+ }
+ printf ("Casefile tests succeeded.\n");
+ return CMD_SUCCESS;
+}
+
+static void
+test_casefile (int pattern, size_t case_size, size_t case_cnt)
+{
+ int zero = 0;
+ struct casefile *cf;
+ struct casereader *r1, *r2;
+ const struct ccase *c;
+ struct rng *rng;
+ size_t i, j;
+
+ rng = rng_create ();
+ rng_seed (rng, &zero, sizeof zero);
+ cf = casefile_create (case_size);
+ for (i = 0; i < case_cnt; i++)
+ write_random_case (cf, i);
+ r1 = casefile_get_reader (cf);
+ r2 = casefile_get_reader (cf);
+ switch (pattern)
+ {
+ case 0:
+ for (i = 0; i < case_cnt; i++)
+ {
+ read_and_verify_random_case (cf, r1, i);
+ read_and_verify_random_case (cf, r2, i);
+ }
+ break;
+ case 1:
+ for (i = 0; i < case_cnt; i++)
+ read_and_verify_random_case (cf, r1, i);
+ for (i = 0; i < case_cnt; i++)
+ read_and_verify_random_case (cf, r2, i);
+ break;
+ case 2:
+ case 3:
+ case 4:
+ for (i = j = 0; i < case_cnt; i++)
+ {
+ read_and_verify_random_case (cf, r1, i);
+ if (rng_get_int (rng) % pattern == 0)
+ read_and_verify_random_case (cf, r2, j++);
+ if (i == case_cnt / 2)
+ casefile_to_disk (cf);
+ }
+ for (; j < case_cnt; j++)
+ read_and_verify_random_case (cf, r2, j);
+ break;
+ }
+ if (casereader_read (r1, &c))
+ fail_test ("Casereader 1 not at end of file.");
+ if (casereader_read (r2, &c))
+ fail_test ("Casereader 2 not at end of file.");
+ if (pattern != 1)
+ casereader_destroy (r1);
+ if (pattern != 2)
+ casereader_destroy (r2);
+ casefile_destroy (cf);
+ rng_destroy (rng);
+}
+
+static struct ccase *
+get_random_case (size_t case_size, size_t case_idx)
+{
+ struct ccase *c = xmalloc (case_size);
+ memset (c, case_idx % 257, case_size);
+ return c;
+}
+
+static void
+write_random_case (struct casefile *cf, size_t case_idx)
+{
+ struct ccase *c = get_random_case (casefile_get_case_size (cf), case_idx);
+ casefile_append (cf, c);
+ free (c);
+}
+
+static void
+read_and_verify_random_case (struct casefile *cf,
+ struct casereader *reader, size_t case_idx)
+{
+ const struct ccase *read_case;
+ struct ccase *expected_case;
+ size_t case_size;
+
+ case_size = casefile_get_case_size (cf);
+ expected_case = get_random_case (case_size, case_idx);
+ if (!casereader_read (reader, &read_case))
+ fail_test ("Premature end of casefile.");
+ if (memcmp (read_case, expected_case, case_size))
+ fail_test ("Case %lu fails comparison.", (unsigned long) case_idx);
+ free (expected_case);
+}
+
+static void
+fail_test (const char *message, ...)
+{
+ va_list args;
+
+ va_start (args, message);
+ vprintf (message, args);
+ putchar ('\n');
+ va_end (args);
+
+ exit (1);
+}
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2004 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@gnu.org>.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+#ifndef HEADER_CASEFILE
+#define HEADER_CASEFILE
+
+#include <stddef.h>
+
+struct ccase;
+struct casefile;
+struct casereader;
+
+struct casefile *casefile_create (size_t case_size);
+void casefile_destroy (struct casefile *);
+
+int casefile_in_core (const struct casefile *);
+size_t casefile_get_case_size (const struct casefile *);
+unsigned long casefile_get_case_cnt (const struct casefile *);
+
+void casefile_append (struct casefile *, const struct ccase *);
+void casefile_to_disk (struct casefile *);
+
+int casefile_sort (struct casefile *,
+ int (*compare) (const struct ccase *,
+ const struct ccase *, void *aux),
+ void *aux);
+
+struct casereader *casefile_get_reader (const struct casefile *);
+int casereader_read (struct casereader *, const struct ccase **);
+void casereader_destroy (struct casereader *);
+
+#endif /* casefile.h */
bw = find_word (b, &bw_len);
assert (aw != NULL && bw != NULL);
+ /* Words that are the same don't conflict. */
+ if (aw_len == bw_len && !memcmp (aw, bw, aw_len))
+ return 0;
+
+ /* Words that are otherwise the same in the first three letters
+ do conflict. */
return ((aw_len > 3 && bw_len > 3)
|| (aw_len == 3 && bw_len > 3)
|| (bw_len == 3 && aw_len > 3)) && !memcmp (aw, bw, 3);
DEFCMD ("COUNT", ERRO, INPU, TRAN, TRAN, cmd_count)
DEFCMD ("CROSSTABS", ERRO, ERRO, PROC, PROC, cmd_crosstabs)
DEFCMD ("DATA LIST", TRAN, INPU, TRAN, TRAN, cmd_data_list)
+DEFCMD ("DEBUG CASEFILE", INIT, INPU, TRAN, PROC, cmd_debug_casefile)
DEFCMD ("DEBUG EVALUATE", INIT, INPU, TRAN, PROC, cmd_debug_evaluate)
+DEFCMD ("DEBUG MOMENTS", INIT, INPU, TRAN, PROC, cmd_debug_moments)
DEFCMD ("DESCRIPTIVES", ERRO, ERRO, PROC, PROC, cmd_descriptives)
DEFCMD ("DISPLAY", ERRO, INPU, TRAN, PROC, cmd_display)
DEFCMD ("DO IF", ERRO, INPU, TRAN, TRAN, cmd_do_if)
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
+#include <gsl/gsl_cdf.h>
#include "algorithm.h"
#include "alloc.h"
#include "hash.h"
#include "error.h"
#include "magic.h"
#include "misc.h"
-#include "stats.h"
#include "output.h"
#include "tab.h"
#include "value-labels.h"
tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3);
tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0);
tab_float (chisq, 3, 0, TAB_RIGHT,
- chisq_sig (chisq_v[i], df[i]), 8, 3);
+ gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3);
}
else
{
if (cmd.a_statistics[CRS_ST_D])
{
- d_yx_cum += fij * sqr (Dr * (Cij - Dij)
- - (P - Q) * (W - row_tot[i]));
- d_xy_cum += fij * sqr (Dc * (Dij - Cij)
- - (Q - P) * (W - col_tot[j]));
+ d_yx_cum += fij * pow2 (Dr * (Cij - Dij)
+ - (P - Q) * (W - row_tot[i]));
+ d_xy_cum += fij * pow2 (Dc * (Dij - Cij)
+ - (Q - P) * (W - col_tot[j]));
}
if (++j == n_cols)
}
btau_var = ((btau_cum
- - (W * sqr (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
- / sqr (Dr * Dc));
+ - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc))))
+ / pow2 (Dr * Dc));
if (cmd.a_statistics[CRS_ST_BTAU])
{
ase[3] = sqrt (btau_var);
somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc);
somers_d_t[0] = (somers_d_v[0]
/ (4 / (Dc + Dr)
- * sqrt (ctau_cum - sqr (P - Q) / W)));
+ * sqrt (ctau_cum - pow2 (P - Q) / W)));
somers_d_v[1] = (P - Q) / Dc;
- somers_d_ase[1] = 2. / sqr (Dc) * sqrt (d_xy_cum);
+ somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum);
somers_d_t[1] = (somers_d_v[1]
/ (2. / Dc
- * sqrt (ctau_cum - sqr (P - Q) / W)));
+ * sqrt (ctau_cum - pow2 (P - Q) / W)));
somers_d_v[2] = (P - Q) / Dr;
- somers_d_ase[2] = 2. / sqr (Dr) * sqrt (d_yx_cum);
+ somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum);
somers_d_t[2] = (somers_d_v[2]
/ (2. / Dr
- * sqrt (ctau_cum - sqr (P - Q) / W)));
+ * sqrt (ctau_cum - pow2 (P - Q) / W)));
}
free (cum);
/ (W * (W * W - sum_rici) * (W * W - sum_rici)));
#if 0
t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii))
- / sqr (W * W - sum_rici))
+ / pow2 (W * W - sum_rici))
+ ((2. * (W - sum_fii)
* (2. * sum_fii * sum_rici
- W * sum_fiiri_ci))
/ cube (W * W - sum_rici))
- + (sqr (W - sum_fii)
+ + (pow2 (W - sum_fii)
* (W * sum_fijri_ci2 - 4.
* sum_rici * sum_rici)
/ pow4 (W * W - sum_rici))));
{
const int deltaj = j == cm_index;
accum += (mat[j + i * n_cols]
- * sqr ((j == fim_index[i])
+ * pow2 ((j == fim_index[i])
- deltaj
+ v[0] * deltaj));
}
if (cm_index != fim_index[i])
accum += (mat[i * n_cols + fim_index[i]]
+ mat[i * n_cols + cm_index]);
- t[2] = v[2] / (sqrt (accum - sqr (sum_fim - cm) / W) / (W - cm));
+ t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm));
}
/* ASE1 for X given Y. */
{
const int deltaj = i == rm_index;
accum += (mat[j + i * n_cols]
- * sqr ((i == fmj_index[j])
+ * pow2 ((i == fmj_index[j])
- deltaj
+ v[0] * deltaj));
}
if (rm_index != fmj_index[j])
accum += (mat[j + n_cols * fmj_index[j]]
+ mat[j + n_cols * rm_index]);
- t[1] = v[1] / (sqrt (accum - sqr (sum_fmj - rm) / W) / (W - rm));
+ t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm));
}
/* Symmetric ASE0 and ASE1. */
{
int temp0 = (fmj_index[j] == i) + (fim_index[i] == j);
int temp1 = (i == rm_index) + (j == cm_index);
- accum0 += mat[j + i * n_cols] * sqr (temp0 - temp1);
+ accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1);
accum1 += (mat[j + i * n_cols]
- * sqr (temp0 + (v[0] - 1.) * temp1));
+ * pow2 (temp0 + (v[0] - 1.) * temp1));
}
ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm);
- t[0] = v[0] / (sqrt (accum0 - sqr ((sum_fim + sum_fmj - cm - rm) / W))
+ t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W))
/ (2. * W - rm - cm));
}
for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++)
for (j = 0; j < n_cols; j++)
{
- double temp = sqr (mat[j + i * n_cols]);
+ double temp = pow2 (mat[j + i * n_cols]);
sum_fij2_ri += temp / row_tot[i];
sum_fij2_ci += temp / col_tot[j];
}
if (entry <= 0.)
continue;
- P += entry * sqr (log (col_tot[j] * row_tot[i] / (W * entry)));
+ P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry)));
UXY -= entry / W * log (entry / W);
}
if (entry <= 0.)
continue;
- ase1_yx += entry * sqr (UY * log (entry / row_tot[i])
+ ase1_yx += entry * pow2 (UY * log (entry / row_tot[i])
+ (UX - UXY) * log (col_tot[j] / W));
- ase1_xy += entry * sqr (UX * log (entry / col_tot[j])
+ ase1_xy += entry * pow2 (UX * log (entry / col_tot[j])
+ (UY - UXY) * log (row_tot[i] / W));
- ase1_sym += entry * sqr ((UXY
+ ase1_sym += entry * pow2 ((UXY
* log (row_tot[i] * col_tot[j] / (W * W)))
- (UX + UY) * log (entry / W));
}
v[5] = 2. * ((UX + UY - UXY) / (UX + UY));
- ase[5] = (2. / (W * sqr (UX + UY))) * sqrt (ase1_sym);
+ ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym);
t[5] = v[5] / ((2. / (W * (UX + UY)))
- * sqrt (P - sqr (UX + UY - UXY) / W));
+ * sqrt (P - pow2 (UX + UY - UXY) / W));
v[6] = (UX + UY - UXY) / UX;
ase[6] = sqrt (ase1_xy) / (W * UX * UX);
- t[6] = v[6] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UX));
+ t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX));
v[7] = (UX + UY - UXY) / UY;
ase[7] = sqrt (ase1_yx) / (W * UY * UY);
- t[7] = v[7] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UY));
+ t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY));
}
/* Somers' D. */
#include <config.h>
#include <assert.h>
+#include <math.h>
#include <stdio.h>
+#include <stdlib.h>
+#include "alloc.h"
#include "command.h"
#include "error.h"
#include "expr.h"
#include "lexer.h"
+#include "moments.h"
#include "var.h"
-int
-cmd_debug_evaluate (void)
-{
- struct expression *expr;
- union value value;
- enum expr_type expr_flags;
- int dump_postfix = 0;
-
- discard_variables ();
-
- expr_flags = 0;
- if (lex_match_id ("NOOPTIMIZE"))
- expr_flags |= EXPR_NO_OPTIMIZE;
- if (lex_match_id ("POSTFIX"))
- dump_postfix = 1;
- if (token != '/')
- {
- lex_force_match ('/');
- return CMD_FAILURE;
- }
- fprintf (stderr, "%s => ", lex_rest_of_line (NULL));
- lex_get ();
-
- expr = expr_parse (EXPR_ANY | expr_flags);
- if (!expr || token != '.')
- {
- fprintf (stderr, "error\n");
- return CMD_FAILURE;
- }
-
- if (dump_postfix)
- expr_debug_print_postfix (expr);
- else
- {
- expr_evaluate (expr, NULL, 0, &value);
- switch (expr_get_type (expr))
- {
- case EXPR_NUMERIC:
- if (value.f == SYSMIS)
- fprintf (stderr, "sysmis\n");
- else
- fprintf (stderr, "%.2f\n", value.f);
- break;
-
- case EXPR_BOOLEAN:
- if (value.f == SYSMIS)
- fprintf (stderr, "sysmis\n");
- else if (value.f == 0.0)
- fprintf (stderr, "false\n");
- else
- fprintf (stderr, "true\n");
- break;
-
- case EXPR_STRING:
- fputc ('"', stderr);
- fwrite (value.c + 1, value.c[0], 1, stderr);
- fputs ("\"\n", stderr);
- break;
-
- default:
- assert (0);
- }
- }
-
- expr_free (expr);
- return CMD_SUCCESS;
-}
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
- Written by Ben Pfaff <blp@gnu.org>.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
-
-/* FIXME: Many possible optimizations. */
-
-#include <config.h>
-#include "error.h"
-#include <limits.h>
-#include <math.h>
-#include <stdlib.h>
-#include "algorithm.h"
-#include "alloc.h"
-#include "bitvector.h"
-#include "command.h"
-#include "lexer.h"
-#include "error.h"
-#include "magic.h"
-#include "stats.h"
-#include "som.h"
-#include "tab.h"
-#include "var.h"
-#include "vfm.h"
-
-/* (specification)
- DESCRIPTIVES (dsc_):
- *variables=custom;
- +missing=miss:!variable/listwise,incl:!noinclude/include;
- +format=labeled:!labels/nolabels,indexed:!noindex/index,lined:!line/serial;
- +save=;
- +options[op_]=1,2,3,4,5,6,7,8;
- +statistics[st_]=all,1|mean,2|semean,5|stddev,6|variance,7|kurtosis,
- 8|skewness,9|range,10|minimum,11|maximum,12|sum,
- 13|default,14|seskewness,15|sekurtosis;
- +sort=sortby:mean/semean/stddev/variance/kurtosis/skewness/range/
- range/minimum/maximum/sum/name/seskewness/sekurtosis/!none,
- order:!a/d.
-*/
-/* (declarations) */
-/* (functions) */
-
-/* DESCRIPTIVES private data. */
-
-/* Describes properties of a distribution for the purpose of
- calculating a Z-score. */
-struct dsc_z_score
- {
- struct variable *s, *d; /* Source, destination variable. */
- double mean; /* Distribution mean. */
- double std_dev; /* Distribution standard deviation. */
- };
-
-/* DESCRIPTIVES transformation (for calculating Z-scores). */
-struct descriptives_trns
- {
- struct trns_header h;
- int n; /* Number of Z-scores. */
- struct dsc_z_score *z; /* Array of Z-scores. */
- };
-
-/* These next three vars, see comment at top of display(). */
-/* Number of cases missing listwise, even if option 5 not selected. */
-static double d_glob_miss_list;
-
-/* Number of total *cases* valid or missing, as a double. Unless
- option 5 is selected, d_glob_missing is 0. */
-static double d_glob_valid, d_glob_missing;
-
-/* Set when a weighting variable is missing or <=0. */
-static int bad_weight;
-
-/* Number of generic zvarnames we've generated in this execution. */
-static int z_count;
-
-/* Variables specified on command. */
-static struct variable **v_variables;
-static int n_variables;
-
-/* Command specifications. */
-static struct cmd_descriptives cmd;
-
-/* Whether z-scores are computed. */
-static int z_scores;
-
-/* Statistic to sort by. */
-static int sortby_stat;
-
-/* Statistics to display. */
-static unsigned long stats;
-
-/* Easier access to long-named arrays. */
-#define stat cmd.a_statistics
-#define opt cmd.a_options
-
-/* Groups of statistics. */
-#define BI BIT_INDEX
-
-#define dsc_default \
- (BI (dsc_mean) | BI (dsc_stddev) | BI (dsc_min) | BI (dsc_max))
-
-#define dsc_all \
- (BI (dsc_sum) | BI (dsc_min) | BI (dsc_max) \
- | BI (dsc_mean) | BI (dsc_semean) | BI (dsc_stddev) \
- | BI (dsc_variance) | BI (dsc_kurt) | BI (dsc_sekurt) \
- | BI (dsc_skew) | BI (dsc_seskew) | BI (dsc_range) \
- | BI (dsc_range))
-
-/* Table of options. */
-#define op_incl_miss DSC_OP_1 /* Honored. */
-#define op_no_varlabs DSC_OP_2 /* Ignored. */
-#define op_zscores DSC_OP_3 /* Honored. */
-#define op_index DSC_OP_4 /* FIXME. */
-#define op_excl_miss DSC_OP_5 /* Honored. */
-#define op_serial DSC_OP_6 /* Honored. */
-#define op_narrow DSC_OP_7 /* Ignored. */
-#define op_no_varnames DSC_OP_8 /* Honored. */
-
-/* Describes one statistic that can be calculated. */
-/* FIXME: Currently sm,col_width are not used. */
-struct dsc_info
- {
- int st_indx; /* Index into st_a_statistics[]. */
- int sb_indx; /* Sort-by index. */
- const char *s10; /* Name, stuffed into 10 columns. */
- const char *s8; /* Name, stuffed into 8 columns. */
- const char *sm; /* Name, stuffed minimally. */
- const char *s; /* Full name. */
- int max_degree; /* Highest degree necessary to calculate this
- statistic. */
- int col_width; /* Column width (not incl. spacing between columns) */
- };
-
-/* Table of statistics, indexed by dsc_*. */
-static struct dsc_info dsc_info[dsc_n_stats] =
-{
- {DSC_ST_MEAN, DSC_MEAN, N_("Mean"), N_("Mean"), N_("Mean"), N_("mean"), 1, 10},
- {DSC_ST_SEMEAN, DSC_SEMEAN, N_("S.E. Mean"), N_("S E Mean"), N_("SE"),
- N_("standard error of mean"), 2, 10},
- {DSC_ST_STDDEV, DSC_STDDEV, N_("Std Dev"), N_("Std Dev"), N_("SD"),
- N_("standard deviation"), 2, 11},
- {DSC_ST_VARIANCE, DSC_VARIANCE, N_("Variance"), N_("Variance"),
- N_("Var"), N_("variance"), 2, 12},
- {DSC_ST_KURTOSIS, DSC_KURTOSIS, N_("Kurtosis"), N_("Kurtosis"),
- N_("Kurt"), N_("kurtosis"), 4, 9},
- {DSC_ST_SEKURTOSIS, DSC_SEKURTOSIS, N_("S.E. Kurt"), N_("S E Kurt"), N_("SEKurt"),
- N_("standard error of kurtosis"), 0, 9},
- {DSC_ST_SKEWNESS, DSC_SKEWNESS, N_("Skewness"), N_("Skewness"), N_("Skew"),
- N_("skewness"), 3, 9},
- {DSC_ST_SESKEWNESS, DSC_SESKEWNESS, N_("S.E. Skew"), N_("S E Skew"), N_("SESkew"),
- N_("standard error of skewness"), 0, 9},
- {DSC_ST_RANGE, DSC_RANGE, N_("Range"), N_("Range"), N_("Rng"), N_("range"), 0, 10},
- {DSC_ST_MINIMUM, DSC_MINIMUM, N_("Minimum"), N_("Minimum"), N_("Min"),
- N_("minimum"), 0, 10},
- {DSC_ST_MAXIMUM, DSC_MAXIMUM, N_("Maximum"), N_("Maximum"), N_("Max"),
- N_("maximum"), 0, 10},
- {DSC_ST_SUM, DSC_SUM, N_("Sum"), N_("Sum"), N_("Sum"), N_("sum"), 1, 13},
-};
-
-/* Z-score functions. */
-static int generate_z_varname (struct variable * v);
-static void dump_z_table (void);
-static void run_z_pass (void);
-
-/* Procedure execution functions. */
-static int calc (struct ccase *, void *);
-static void precalc (void *);
-static void postcalc (void *);
-static void display (void);
-\f
-/* Parser and outline. */
-
-int
-cmd_descriptives (void)
-{
- struct variable *v;
- int i;
-
- v_variables = NULL;
- n_variables = 0;
-
- if (!parse_descriptives (&cmd))
- goto lossage;
-
- if (n_variables == 0)
- goto lossage;
- for (i = 0; i < n_variables; i++)
- {
- v = v_variables[i];
- v->p.dsc.dup = 0;
- v->p.dsc.zname[0] = 0;
- }
-
- if (n_variables < 0)
- {
- msg (SE, _("No variables specified."));
- goto lossage;
- }
-
- if (cmd.sbc_options && (cmd.sbc_save || cmd.sbc_format || cmd.sbc_missing))
- {
- msg (SE, _("OPTIONS may not be used with SAVE, FORMAT, or MISSING."));
- goto lossage;
- }
-
- if (!cmd.sbc_options)
- {
- if (cmd.incl == DSC_INCLUDE)
- opt[op_incl_miss] = 1;
- if (cmd.labeled == DSC_NOLABELS)
- opt[op_no_varlabs] = 1;
- if (cmd.sbc_save)
- opt[op_zscores] = 1;
- if (cmd.miss == DSC_LISTWISE)
- opt[op_excl_miss] = 1;
- if (cmd.lined == DSC_SERIAL)
- opt[op_serial] = 1;
- }
-
- /* Construct z-score varnames, show translation table. */
- if (opt[op_zscores])
- {
- z_count = 0;
- for (i = 0; i < n_variables; i++)
- {
- v = v_variables[i];
- if (v->p.dsc.dup++)
- continue;
-
- if (v->p.dsc.zname[0] == 0)
- if (!generate_z_varname (v))
- goto lossage;
- }
- dump_z_table ();
- z_scores = 1;
- }
-
- /* Figure out statistics to calculate. */
- stats = 0;
- if (stat[DSC_ST_DEFAULT] || !cmd.sbc_statistics)
- stats |= dsc_default;
- if (stat[DSC_ST_ALL])
- stats |= dsc_all;
- for (i = 0; i < dsc_n_stats; i++)
- if (stat[dsc_info[i].st_indx])
- stats |= BIT_INDEX (i);
- if (stats & dsc_kurt)
- stats |= dsc_sekurt;
- if (stats & dsc_skew)
- stats |= dsc_seskew;
-
- /* Check the sort order. */
- sortby_stat = -1;
- if (cmd.sortby == DSC_NONE)
- sortby_stat = -2;
- else if (cmd.sortby != DSC_NAME)
- {
- for (i = 0; i < n_variables; i++)
- if (dsc_info[i].sb_indx == cmd.sortby)
- {
- sortby_stat = i;
- if (!(stats & BIT_INDEX (i)))
- {
- msg (SE, _("It's not possible to sort on `%s' without "
- "displaying `%s'."),
- gettext (dsc_info[i].s), gettext (dsc_info[i].s));
- goto lossage;
- }
- }
- assert (sortby_stat != -1);
- }
-
- /* Data pass! */
- bad_weight = 0;
- procedure_with_splits (precalc, calc, postcalc, NULL);
-
- if (bad_weight)
- msg (SW, _("At least one case in the data file had a weight value "
- "that was system-missing, zero, or negative. These case(s) "
- "were ignored."));
-
- /* Z-scoring! */
- if (z_scores)
- run_z_pass ();
-
- if (v_variables)
- free (v_variables);
- return CMD_SUCCESS;
-
- lossage:
- if (v_variables)
- free (v_variables);
- return CMD_FAILURE;
-}
-
-/* Parses the VARIABLES subcommand. */
-static int
-dsc_custom_variables (struct cmd_descriptives *cmd UNUSED)
-{
- if (!lex_match_id ("VARIABLES")
- && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
- && token != T_ALL)
- return 2;
- lex_match ('=');
-
- while (token == T_ID || token == T_ALL)
- {
- int i, n;
-
- n = n_variables;
- if (!parse_variables (default_dict, &v_variables, &n_variables,
- PV_DUPLICATE | PV_SINGLE | PV_APPEND | PV_NUMERIC
- | PV_NO_SCRATCH))
- return 0;
- if (lex_match ('('))
- {
- if (n_variables - n > 1)
- {
- msg (SE, _("Names for z-score variables must be given for "
- "individual variables, not for groups of "
- "variables."));
- return 0;
- }
- assert (n_variables - n <= 0);
- if (token != T_ID)
- {
- msg (SE, _("Name for z-score variable expected."));
- return 0;
- }
- if (dict_lookup_var (default_dict, tokid))
- {
- msg (SE, _("Z-score variable name `%s' is a "
- "duplicate variable name with a current variable."),
- tokid);
- return 0;
- }
- for (i = 0; i < n_variables; i++)
- if (v_variables[i]->p.dsc.zname[0]
- && !strcmp (v_variables[i]->p.dsc.zname, tokid))
- {
- msg (SE, _("Z-score variable name `%s' is "
- "used multiple times."), tokid);
- return 0;
- }
- strcpy (v_variables[n_variables - 1]->p.dsc.zname, tokid);
- lex_get ();
- if (token != ')')
- {
- msg (SE, _("`)' expected after z-score variable name."));
- return 0;
- }
-
- z_scores = 1;
- }
- lex_match (',');
- }
- return 1;
-}
-\f
-/* Z scores. */
-
-/* Returns 0 if NAME is a duplicate of any existing variable name or
- of any previously-declared z-var name; otherwise returns 1. */
-static int
-try_name (char *name)
-{
- int i;
-
- if (dict_lookup_var (default_dict, name) != NULL)
- return 0;
- for (i = 0; i < n_variables; i++)
- {
- struct variable *v = v_variables[i];
- if (!strcmp (v->p.dsc.zname, name))
- return 0;
- }
- return 1;
-}
-
-static int
-generate_z_varname (struct variable * v)
-{
- char zname[10];
-
- strcpy (&zname[1], v->name);
- zname[0] = 'Z';
- zname[8] = '\0';
- if (try_name (zname))
- {
- strcpy (v->p.dsc.zname, zname);
- return 1;
- }
-
- for (;;)
- {
- /* Generate variable name. */
- z_count++;
-
- if (z_count <= 99)
- sprintf (zname, "ZSC%03d", z_count);
- else if (z_count <= 108)
- sprintf (zname, "STDZ%02d", z_count - 99);
- else if (z_count <= 117)
- sprintf (zname, "ZZZZ%02d", z_count - 108);
- else if (z_count <= 126)
- sprintf (zname, "ZQZQ%02d", z_count - 117);
- else
- {
- msg (SE, _("Ran out of generic names for Z-score variables. "
- "There are only 126 generic names: ZSC001-ZSC0999, "
- "STDZ01-STDZ09, ZZZZ01-ZZZZ09, ZQZQ01-ZQZQ09."));
- return 0;
- }
-
- if (try_name (zname))
- {
- strcpy (v->p.dsc.zname, zname);
- return 1;
- }
- }
-}
-
-static void
-dump_z_table (void)
-{
- int count;
- struct tab_table *t;
-
- {
- int i;
-
- for (count = i = 0; i < n_variables; i++)
- if (v_variables[i]->p.dsc.zname)
- count++;
- }
-
- t = tab_create (2, count + 1, 0);
- tab_title (t, 0, _("Mapping of variables to corresponding Z-scores."));
- tab_columns (t, SOM_COL_DOWN, 1);
- tab_headers (t, 0, 0, 1, 0);
- tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, count);
- tab_hline (t, TAL_2, 0, 1, 1);
- tab_text (t, 0, 0, TAB_CENTER | TAT_TITLE, _("Source"));
- tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Target"));
- tab_dim (t, tab_natural_dimensions);
-
- {
- int i, y;
-
- for (i = 0, y = 1; i < n_variables; i++)
- if (v_variables[i]->p.dsc.zname)
- {
- tab_text (t, 0, y, TAB_LEFT, v_variables[i]->name);
- tab_text (t, 1, y++, TAB_LEFT, v_variables[i]->p.dsc.zname);
- }
- }
-
- tab_submit (t);
-}
-
-/* Transformation function to calculate Z-scores. */
-static int
-descriptives_trns_proc (struct trns_header * trns, struct ccase * c,
- int case_num UNUSED)
-{
- struct descriptives_trns *t = (struct descriptives_trns *) trns;
- struct dsc_z_score *z;
- int i;
-
- for (i = t->n, z = t->z; i--; z++)
- {
- double score = c->data[z->s->fv].f;
-
- if (z->mean == SYSMIS || score == SYSMIS)
- c->data[z->d->fv].f = SYSMIS;
- else
- c->data[z->d->fv].f = (score - z->mean) / z->std_dev;
- }
- return -1;
-}
-
-/* Frees a descriptives_trns struct. */
-static void
-descriptives_trns_free (struct trns_header * trns)
-{
- struct descriptives_trns *t = (struct descriptives_trns *) trns;
-
- free (t->z);
-}
-
-/* The name is a misnomer: actually this function sets up a
- transformation by which scores can be converted into Z-scores. */
-static void
-run_z_pass (void)
-{
- struct descriptives_trns *t;
- int count, i;
-
- for (i = 0; i < n_variables; i++)
- v_variables[i]->p.dsc.dup = 0;
- for (count = i = 0; i < n_variables; i++)
- {
- if (v_variables[i]->p.dsc.dup++)
- continue;
- if (v_variables[i]->p.dsc.zname)
- count++;
- }
-
- t = xmalloc (sizeof *t);
- t->h.proc = descriptives_trns_proc;
- t->h.free = descriptives_trns_free;
- t->n = count;
- t->z = xmalloc (count * sizeof *t->z);
-
- for (i = 0; i < n_variables; i++)
- v_variables[i]->p.dsc.dup = 0;
- for (count = i = 0; i < n_variables; i++)
- {
- struct variable *v = v_variables[i];
- if (v->p.dsc.dup++ == 0 && v->p.dsc.zname[0])
- {
- char *cp;
- struct variable *d;
-
- t->z[count].s = v;
- t->z[count].d = d = dict_create_var_assert (default_dict,
- v->p.dsc.zname, 0);
- d->init = 0;
- if (v->label)
- {
- d->label = xmalloc (strlen (v->label) + 12);
- cp = stpcpy (d->label, _("Z-score of "));
- strcpy (cp, v->label);
- }
- else
- {
- d->label = xmalloc (strlen (v->name) + 12);
- cp = stpcpy (d->label, _("Z-score of "));
- strcpy (cp, v->name);
- }
- t->z[count].mean = v->p.dsc.stats[dsc_mean];
- t->z[count].std_dev = v->p.dsc.stats[dsc_stddev];
- if (t->z[count].std_dev == SYSMIS || t->z[count].std_dev == 0.0)
- t->z[count].mean = SYSMIS;
- count++;
- }
- }
-
- add_transformation ((struct trns_header *) t);
-}
-\f
-/* Statistical calculation. */
-
-static void
-precalc (void *aux UNUSED)
-{
- int i;
-
- for (i = 0; i < n_variables; i++)
- v_variables[i]->p.dsc.dup = -2;
- for (i = 0; i < n_variables; i++)
- {
- struct descriptives_proc *dsc = &v_variables[i]->p.dsc;
-
- /* Don't need to initialize more than once. */
- if (dsc->dup == -1)
- continue;
- dsc->dup = -1;
-
- dsc->valid = dsc->miss = 0.0;
- dsc->X_bar = dsc->M2 = dsc->M3 = dsc->M4 = 0.0;
- dsc->min = DBL_MAX;
- dsc->max = -DBL_MAX;
- }
- d_glob_valid = d_glob_missing = 0.0;
-}
-
-static int
-calc (struct ccase * c, void *aux UNUSED)
-{
- int i;
-
- /* Unique case identifier. */
- static int case_id;
-
- /* Get the weight for this case. */
- double weight = dict_get_case_weight (default_dict, c);
-
- if (weight <= 0.0)
- {
- weight = 0.0;
- bad_weight = 1;
- }
- case_id++;
-
- /* Handle missing values. */
- for (i = 0; i < n_variables; i++)
- {
- struct variable *v = v_variables[i];
- double X = c->data[v->fv].f;
-
- if (X == SYSMIS || (!opt[op_incl_miss] && is_num_user_missing (X, v)))
- {
- if (opt[op_excl_miss])
- {
- d_glob_missing += weight;
- return 1;
- }
- else
- {
- d_glob_miss_list += weight;
- goto iterate;
- }
- }
- }
- d_glob_valid += weight;
-
-iterate:
- for (i = 0; i < n_variables; i++)
- {
- struct descriptives_proc *inf = &v_variables[i]->p.dsc;
-
- double X, v;
- double W_old, W_new;
- double v2, v3, v4;
- double w2, w3, w4;
-
- if (inf->dup == case_id)
- continue;
- inf->dup = case_id;
-
- X = c->data[v_variables[i]->fv].f;
- if (X == SYSMIS
- || (!opt[op_incl_miss] && is_num_user_missing (X, v_variables[i])))
- {
- inf->miss += weight;
- continue;
- }
-
- /* These formulas taken from _SPSS Statistical Algorithms_. The
- names W_old, and W_new are used for W_j-1 and W_j,
- respectively, and other variables simply have the subscripts
- trimmed off, except for X_bar.
-
- I am happy that mathematical formulas may not be
- copyrighted. */
- W_old = inf->valid;
- W_new = inf->valid += weight;
- v = (weight / W_new) * (X - inf->X_bar);
- v2 = v * v;
- v3 = v2 * v;
- v4 = v3 * v;
- w2 = weight * weight;
- w3 = w2 * weight;
- w4 = w3 * weight;
- inf->M4 += (-4.0 * v * inf->M3 + 6.0 * v2 * inf->M2
- + (W_new * W_new - 3 * weight * W_old / w3) * v4 * W_old * W_new);
- inf->M3 += (-3.0 * v * inf->M2 + W_new * W_old / w2
- * (W_new - 2 * weight) * v3);
- inf->M2 += W_new * W_old / weight * v2;
- inf->X_bar += v;
- if (X < inf->min)
- inf->min = X;
- if (X > inf->max)
- inf->max = X;
- }
- return 1;
-}
-
-static void
-postcalc (void *aux UNUSED)
-{
- int i;
-
- if (opt[op_excl_miss])
- d_glob_miss_list = d_glob_missing;
-
- for (i = 0; i < n_variables; i++)
- {
- struct descriptives_proc *dsc = &v_variables[i]->p.dsc;
- double W;
-
- /* Don't duplicate our efforts. */
- if (dsc->dup == -2)
- continue;
- dsc->dup = -2;
-
- W = dsc->valid;
-
- dsc->stats[dsc_mean] = dsc->X_bar;
- dsc->stats[dsc_variance] = dsc->M2 / (W - 1);
- dsc->stats[dsc_stddev] = sqrt (dsc->stats[dsc_variance]);
- dsc->stats[dsc_semean] = dsc->stats[dsc_stddev] / sqrt (W);
- dsc->stats[dsc_min] = dsc->min == DBL_MAX ? SYSMIS : dsc->min;
- dsc->stats[dsc_max] = dsc->max == -DBL_MAX ? SYSMIS : dsc->max;
- dsc->stats[dsc_range] = ((dsc->min == DBL_MAX || dsc->max == -DBL_MAX)
- ? SYSMIS : dsc->max - dsc->min);
- dsc->stats[dsc_sum] = W * dsc->X_bar;
- if (W > 2.0 && dsc->stats[dsc_variance] >= 1e-20)
- {
- double S = dsc->stats[dsc_stddev];
- dsc->stats[dsc_skew] = (W * dsc->M3 / ((W - 1.0) * (W - 2.0) * S * S * S));
- dsc->stats[dsc_seskew] =
- sqrt (6.0 * W * (W - 1.0) / ((W - 2.0) * (W + 1.0) * (W + 3.0)));
- }
- else
- {
- dsc->stats[dsc_skew] = dsc->stats[dsc_seskew] = SYSMIS;
- }
- if (W > 3.0 && dsc->stats[dsc_variance] >= 1e-20)
- {
- double S2 = dsc->stats[dsc_variance];
- double SE_g1 = dsc->stats[dsc_seskew];
-
- dsc->stats[dsc_kurt] =
- (W * (W + 1.0) * dsc->M4 - 3.0 * dsc->M2 * dsc->M2 * (W - 1.0))
- / ((W - 1.0) * (W - 2.0) * (W - 3.0) * S2 * S2);
-
- /* Note that in _SPSS Statistical Algorithms_, the square
- root symbol is missing from this formula. */
- dsc->stats[dsc_sekurt] =
- sqrt ((4.0 * (W * W - 1.0) * SE_g1 * SE_g1) / ((W - 3.0) * (W + 5.0)));
- }
- else
- {
- dsc->stats[dsc_kurt] = dsc->stats[dsc_sekurt] = SYSMIS;
- }
- }
-
- display ();
-}
-\f
-/* Statistical display. */
-
-static algo_compare_func descriptives_compare_variables;
-
-static void
-display (void)
-{
- int i, j;
-
- int nc, n_stats;
- struct tab_table *t;
-
- /* If op_excl_miss is on, d_glob_valid and (potentially)
- d_glob_missing are nonzero, and d_glob_missing equals
- d_glob_miss_list.
-
- If op_excl_miss is off, d_glob_valid is nonzero. d_glob_missing
- is zero. d_glob_miss_list is (potentially) nonzero. */
-
- if (sortby_stat != -2)
- sort (v_variables, n_variables, sizeof *v_variables,
- descriptives_compare_variables, &cmd);
-
- for (nc = i = 0; i < dsc_n_stats; i++)
- if (stats & BIT_INDEX (i))
- nc++;
- n_stats = nc;
- if (!opt[op_no_varnames])
- nc++;
- nc += opt[op_serial] ? 2 : 1;
-
- t = tab_create (nc, n_variables + 1, 0);
- tab_headers (t, 1, 0, 1, 0);
- tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, n_variables);
- tab_box (t, -1, -1, -1, TAL_1, 1, 0, nc - 1, n_variables);
- tab_hline (t, TAL_2, 0, nc - 1, 1);
- tab_vline (t, TAL_2, 1, 0, n_variables);
- tab_dim (t, tab_natural_dimensions);
-
- nc = 0;
- if (!opt[op_no_varnames])
- {
- tab_text (t, nc++, 0, TAB_LEFT | TAT_TITLE, _("Variable"));
- }
- if (opt[op_serial])
- {
- tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Valid N"));
- tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, _("Missing N"));
- } else {
- tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, "N");
- }
-
- for (i = 0; i < dsc_n_stats; i++)
- if (stats & BIT_INDEX (i))
- {
- const char *title = gettext (dsc_info[i].s8);
- tab_text (t, nc++, 0, TAB_CENTER | TAT_TITLE, title);
- }
-
- for (i = 0; i < n_variables; i++)
- {
- struct variable *v = v_variables[i];
-
- nc = 0;
- if (!opt[op_no_varnames])
- tab_text (t, nc++, i + 1, TAB_LEFT, v->name);
- tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", v->p.dsc.valid);
- if (opt[op_serial])
- tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", v->p.dsc.miss);
- for (j = 0; j < dsc_n_stats; j++)
- if (stats & BIT_INDEX (j))
- tab_float (t, nc++, i + 1, TAB_NONE, v->p.dsc.stats[j], 10, 3);
- }
-
- tab_title (t, 1, _("Valid cases = %g; cases with missing value(s) = %g."),
- d_glob_valid, d_glob_miss_list);
-
- tab_submit (t);
-}
-
-/* Compares variables A and B according to the ordering specified
- by CMD. */
-static int
-descriptives_compare_variables (const void *a_, const void *b_, void *cmd_)
-{
- struct variable *const *ap = a_;
- struct variable *const *bp = b_;
- struct variable *a = *ap;
- struct variable *b = *bp;
- struct cmd_descriptives *cmd = cmd_;
-
- int result;
-
- if (cmd->sortby == DSC_NAME)
- result = strcmp (a->name, b->name);
- else
- {
- double as = a->p.dsc.stats[sortby_stat];
- double bs = b->p.dsc.stats[sortby_stat];
-
- result = as < bs ? -1 : as > bs;
- }
-
- if (cmd->order == DSC_D)
- result = -result;
-
- return result;
-}
-
-/*
- Local variables:
- mode: c
- End:
-*/
#include "julcal/julcal.h"
#include "magic.h"
#include "misc.h"
+#include "moments.h"
#include "pool.h"
#include "random.h"
-#include "stats.h"
#include "str.h"
#include "var.h"
#include "vfm.h"
case OP_CFVAR:
{
int n_args = *op++;
- int nv = 0;
- double sum[2] =
- {0.0, 0.0};
+ double weight, mean, variance;
sp -= n_args - 1;
- for (i = 0; i < n_args; i++)
- if (sp[i].f != SYSMIS)
- {
- nv++;
- sum[0] += sp[i].f;
- sum[1] += sp[i].f * sp[i].f;
- }
- if (nv < *op++)
+
+ moments_of_values (sp, n_args,
+ &weight, &mean, &variance, NULL, NULL);
+
+ if (weight < *op++ || mean == SYSMIS
+ || mean == 0 || variance == SYSMIS)
sp->f = SYSMIS;
else
- sp->f = calc_cfvar (sum, nv);
+ sp->f = sqrt (variance) / mean;
}
break;
case OP_MAX:
case OP_MEAN:
{
int n_args = *op++;
- int nv = 0;
- double sum[1] =
- {0.0};
+ double weight, mean;
sp -= n_args - 1;
- for (i = 0; i < n_args; i++)
- if (sp[i].f != SYSMIS)
- {
- nv++;
- sum[0] += sp[i].f;
- }
- if (nv < *op++)
- sp->f = SYSMIS;
- else
- sp->f = calc_mean (sum, nv);
+
+ moments_of_values (sp, n_args,
+ &weight, &mean, NULL, NULL, NULL);
+ sp->f = weight < *op++ ? SYSMIS : mean;
}
break;
case OP_MIN:
case OP_SD:
{
int n_args = *op++;
- int nv = 0;
- double sum[2];
-
- sum[0] = sum[1] = 0.0;
+ double weight, variance;
sp -= n_args - 1;
- for (i = 0; i < n_args; i++)
- if (sp[i].f != SYSMIS)
- {
- nv++;
- sum[0] += sp[i].f;
- sum[1] += sp[i].f * sp[i].f;
- }
- if (nv < *op++)
- sp->f = SYSMIS;
- else
- sp->f = calc_stddev (calc_variance (sum, nv));
+ moments_of_values (sp, n_args,
+ &weight, NULL, &variance, NULL, NULL);
+ sp->f = weight < *op++ ? SYSMIS : sqrt (variance);
}
break;
case OP_SUM:
case OP_VARIANCE:
{
int n_args = *op++;
- int nv = 0;
- double sum[2];
-
- sum[0] = sum[1] = 0.0;
+ double weight, variance;
sp -= n_args - 1;
- for (i = 0; i < n_args; i++)
- if (sp[i].f != SYSMIS)
- {
- nv++;
- sum[0] += sp[i].f;
- sum[1] += sp[i].f * sp[i].f;
- }
- if (nv < *op++)
- sp->f = SYSMIS;
- else
- sp->f = calc_variance (sum, nv);
+ moments_of_values (sp, n_args,
+ &weight, NULL, &variance, NULL, NULL);
+ sp->f = weight < *op++ ? SYSMIS : variance;
}
break;
#include "julcal/julcal.h"
#include "misc.h"
#include "pool.h"
-#include "stats.h"
#include "str.h"
#include "var.h"
type = parse_or (&(*n)->nonterm.arg[(*n)->nonterm.n]);
if (type == EXPR_ERROR)
goto fail;
+ (*n)->nonterm.n++;
if (type != EXPR_STRING)
{
msg (SE, _("Argument %d to CONCAT is type %s. All arguments "
(*n)->nonterm.n + 1, expr_type_name (type));
goto fail;
}
- (*n)->nonterm.n++;
if (!lex_match (','))
break;
goto fail;
else if (actual_type == EXPR_BOOLEAN)
actual_type = EXPR_NUMERIC;
+ nonterm->n++;
if (actual_type != wanted_type)
{
msg (SE, _("Argument %d to %s was expected to be of %s type. "
expr_type_name (actual_type), expr_type_name (wanted_type));
goto fail;
}
- nonterm->n++;
}
else if (*cp == 'f')
{
{
#include "expr.def"
};
+\f
+#include "command.h"
+
+int
+cmd_debug_evaluate (void)
+{
+ struct expression *expr;
+ union value value;
+ enum expr_type expr_flags;
+ int dump_postfix = 0;
+
+ discard_variables ();
+
+ expr_flags = 0;
+ if (lex_match_id ("NOOPTIMIZE"))
+ expr_flags |= EXPR_NO_OPTIMIZE;
+ if (lex_match_id ("POSTFIX"))
+ dump_postfix = 1;
+ if (token != '/')
+ {
+ lex_force_match ('/');
+ return CMD_FAILURE;
+ }
+ fprintf (stderr, "%s => ", lex_rest_of_line (NULL));
+ lex_get ();
+
+ expr = expr_parse (EXPR_ANY | expr_flags);
+ if (!expr || token != '.')
+ {
+ if (expr != NULL)
+ expr_free (expr);
+ fprintf (stderr, "error\n");
+ return CMD_FAILURE;
+ }
+
+ if (dump_postfix)
+ expr_debug_print_postfix (expr);
+ else
+ {
+ expr_evaluate (expr, NULL, 0, &value);
+ switch (expr_get_type (expr))
+ {
+ case EXPR_NUMERIC:
+ if (value.f == SYSMIS)
+ fprintf (stderr, "sysmis\n");
+ else
+ fprintf (stderr, "%.2f\n", value.f);
+ break;
+
+ case EXPR_BOOLEAN:
+ if (value.f == SYSMIS)
+ fprintf (stderr, "sysmis\n");
+ else if (value.f == 0.0)
+ fprintf (stderr, "false\n");
+ else
+ fprintf (stderr, "true\n");
+ break;
+
+ case EXPR_STRING:
+ fputc ('"', stderr);
+ fwrite (value.c + 1, value.c[0], 1, stderr);
+ fputs ("\"\n", stderr);
+ break;
+
+ default:
+ assert (0);
+ }
+ }
+
+ expr_free (expr);
+ return CMD_SUCCESS;
+}
#include "algorithm.h"
#include "magic.h"
#include "misc.h"
-#include "stats.h"
#include "output.h"
#include "som.h"
#include "str.h"
#include "var.h"
#include "vfm.h"
#include "alloc.h"
-#include "stats.h"
+#include "misc.h"
#include <math.h>
#include <stdlib.h>
if ( ! l->is_missing(v,var) )
{
levene_z = fabs(v->f - gs->mean);
- lz_denominator[i] += weight * sqr(levene_z - gs->lz_mean);
+ lz_denominator[i] += weight * pow2(levene_z - gs->lz_mean);
}
}
g = (struct group_statistics *) hsh_next(hash[v],&hi) )
{
- lz_numerator += g->n * sqr(g->lz_mean - lz[v].grand_mean );
+ lz_numerator += g->n * pow2(g->lz_mean - lz[v].grand_mean );
}
int intlog10 (unsigned);
+/* Returns the square of X. */
+static inline double
+pow2 (double x)
+{
+ return x * x;
+}
+
+/* Returns the cube of X. */
+static inline double
+pow3 (double x)
+{
+ return x * x * x;
+}
+
+/* Returns the fourth power of X. */
+static inline double
+pow4 (double x)
+{
+ double y = x * x;
+ y *= y;
+ return y;
+}
+
#endif /* math/misc.h */
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@gnu.org>.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+#include <config.h>
+#include "moments.h"
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include "alloc.h"
+#include "misc.h"
+#include "val.h"
+
+/* FIXME? _SPSS Statistical Algorithms_ in the DESCRIPTIVES
+ second describes a "provisional means algorithm" that might be
+ useful for improving accuracy when we only do one pass. */
+
+/* A set of moments in process of calculation. */
+struct moments
+ {
+ enum moment max_moment; /* Highest-order moment we're computing. */
+ int pass; /* Current pass (1 or 2). */
+
+ /* Pass one. */
+ double w1; /* Total weight for pass 1, so far. */
+ double sum; /* Sum of values so far. */
+ double mean; /* Mean = sum / w1. */
+
+ /* Pass two. */
+ double w2; /* Total weight for pass 2, so far. */
+ double d1; /* Sum of deviations from the mean. */
+ double d2; /* Sum of squared deviations from the mean. */
+ double d3; /* Sum of cubed deviations from the mean. */
+ double d4; /* Sum of (deviations from the mean)**4. */
+ };
+
+/* Initializes moments M for calculating moment MAX_MOMENT and
+ lower moments. */
+static void
+init_moments (struct moments *m, enum moment max_moment)
+{
+ assert (m != NULL);
+ assert (max_moment == MOMENT_MEAN || max_moment == MOMENT_VARIANCE
+ || max_moment == MOMENT_SKEWNESS || max_moment == MOMENT_KURTOSIS);
+ m->max_moment = max_moment;
+ moments_clear (m);
+}
+
+/* Clears out a set of moments so that it can be reused for a new
+ set of values. The moments to be calculated are not changed. */
+void
+moments_clear (struct moments *m)
+{
+ m->pass = 1;
+ m->w1 = m->w2 = 0.;
+ m->sum = 0.;
+}
+
+/* Creates and returns a data structure for calculating moment
+ MAX_MOMENT and lower moments on a data series. For greatest
+ accuracy, the user should call moments_pass_one() for each
+ value in the series, then call moments_pass_two() for the same
+ set of values in the same order, then call moments_calculate()
+ to obtain the moments. At a cost of reduced accuracy, the
+ first pass can be skipped. In either case, moments_destroy()
+ should be called when the moments are no longer needed. */
+struct moments *
+moments_create (enum moment max_moment)
+{
+ struct moments *m = xmalloc (sizeof *m);
+ init_moments (m, max_moment);
+ return m;
+}
+
+/* Adds VALUE with the given WEIGHT to the calculation of
+ moments for the first pass. */
+void
+moments_pass_one (struct moments *m, double value, double weight)
+{
+ assert (m != NULL);
+ assert (m->pass == 1);
+
+ if (value != SYSMIS && weight >= 0.)
+ {
+ m->sum += value * weight;
+ m->w1 += weight;
+ }
+}
+
+/* Adds VALUE with the given WEIGHT to the calculation of
+ moments for the second pass. */
+void
+moments_pass_two (struct moments *m, double value, double weight)
+{
+ double d, d_power;
+
+ assert (m != NULL);
+
+ if (m->pass == 1)
+ {
+ m->pass = 2;
+ m->mean = m->w1 != 0. ? m->sum / m->w1 : 0.;
+ m->d1 = m->d2 = m->d3 = m->d4 = 0.;
+ }
+
+ if (value != SYSMIS && weight >= 0.)
+ {
+ m->w2 += weight;
+
+ d = d_power = value - m->mean;
+ m->d1 += d_power * weight;
+
+ if (m->max_moment >= MOMENT_VARIANCE)
+ {
+ d_power *= d;
+ m->d2 += d_power * weight;
+
+ if (m->max_moment >= MOMENT_SKEWNESS)
+ {
+ d_power *= d;
+ m->d3 += d_power * weight;
+
+ if (m->max_moment >= MOMENT_KURTOSIS)
+ {
+ d_power *= d;
+ m->d4 += d_power * weight;
+ }
+ }
+ }
+ }
+}
+
+/* Calculates moments based on the input data. Stores the total
+ weight in *WEIGHT, the mean in *MEAN, the variance in
+ *VARIANCE, the skewness in *SKEWNESS, and the kurtosis in
+ *KURTOSIS. Any of these result parameters may be null
+ pointers, in which case the values are not calculated. If any
+ result cannot be calculated, either because they are undefined
+ based on the input data or because their moments are higher
+ than the maximum requested on moments_create(), then SYSMIS is
+ stored into that result. */
+void
+moments_calculate (const struct moments *m,
+ double *weight,
+ double *mean, double *variance,
+ double *skewness, double *kurtosis)
+{
+ double W;
+ int one_pass;
+
+ assert (m != NULL);
+ assert (m->pass == 2);
+
+ one_pass = m->w1 == 0.;
+
+ /* If passes 1 and 2 are used, then w1 and w2 must agree. */
+ assert (one_pass || m->w1 == m->w2);
+
+ if (mean != NULL)
+ *mean = SYSMIS;
+ if (variance != NULL)
+ *variance = SYSMIS;
+ if (skewness != NULL)
+ *skewness = SYSMIS;
+ if (kurtosis != NULL)
+ *kurtosis = SYSMIS;
+
+ W = m->w2;
+ if (weight != NULL)
+ *weight = W;
+ if (W == 0.)
+ return;
+
+ if (mean != NULL)
+ *mean = m->mean + m->d1 / W;
+
+ if (m->max_moment >= MOMENT_VARIANCE && W > 1.)
+ {
+ double variance_tmp;
+
+ /* From _Numerical Recipes in C_, 2nd ed., 0-521-43108-5,
+ section 14.1. */
+ if (variance == NULL)
+ variance = &variance_tmp;
+ *variance = (m->d2 - pow2 (m->d1) / W) / (W - 1.);
+
+ /* From _SPSS Statistical Algorithms, 2nd ed.,
+ 0-918469-89-9, section "DESCRIPTIVES". */
+ if (fabs (*variance) >= 1e-20)
+ {
+ if (m->max_moment >= MOMENT_SKEWNESS && skewness != NULL && W > 2.)
+ {
+ *skewness = ((W * m->d3 - 3.0 * m->d1 * m->d2 + 2.0
+ * pow3 (m->d1) / W)
+ / ((W - 1.0) * (W - 2.0)
+ * *variance * sqrt (*variance)));
+ if (!finite (*skewness))
+ *skewness = SYSMIS;
+ }
+ if (m->max_moment >= MOMENT_KURTOSIS && kurtosis != NULL && W > 3.)
+ {
+ *kurtosis = (((W + 1) * (W * m->d4
+ - 4.0 * m->d1 * m->d3
+ + 6.0 * pow2 (m->d1) * m->d2 / W
+ - 3.0 * pow4 (m->d1) / pow2 (W)))
+ / ((W - 1.0) * (W - 2.0) * (W - 3.0)
+ * pow2 (*variance))
+ - (3.0 * pow2 (W - 1.0))
+ / ((W - 2.0) * (W - 3.)));
+ if (!finite (*kurtosis))
+ *kurtosis = SYSMIS;
+ }
+ }
+ }
+}
+
+/* Destroys a set of moments. */
+void
+moments_destroy (struct moments *m)
+{
+ free (m);
+}
+
+/* Calculates the requested moments on the CNT values in ARRAY.
+ Each value is given a weight of 1. The total weight is stored
+ into *WEIGHT (trivially) and the mean, variance, skewness, and
+ kurtosis are stored into *MEAN, *VARIANCE, *SKEWNESS, and
+ *KURTOSIS, respectively. Any of the result pointers may be
+ null, in which case no value is stored. */
+void
+moments_of_doubles (const double *array, size_t cnt,
+ double *weight,
+ double *mean, double *variance,
+ double *skewness, double *kurtosis)
+{
+ enum moment max_moment;
+ struct moments m;
+ size_t idx;
+
+ if (kurtosis != NULL)
+ max_moment = MOMENT_KURTOSIS;
+ else if (skewness != NULL)
+ max_moment = MOMENT_SKEWNESS;
+ else if (variance != NULL)
+ max_moment = MOMENT_VARIANCE;
+ else
+ max_moment = MOMENT_MEAN;
+
+ init_moments (&m, max_moment);
+ for (idx = 0; idx < cnt; idx++)
+ moments_pass_one (&m, array[idx], 1.);
+ for (idx = 0; idx < cnt; idx++)
+ moments_pass_two (&m, array[idx], 1.);
+ moments_calculate (&m, weight, mean, variance, skewness, kurtosis);
+}
+
+/* Calculates the requested moments on the CNT numeric values in
+ ARRAY. Each value is given a weight of 1. The total weight
+ is stored into *WEIGHT (trivially) and the mean, variance,
+ skewness, and kurtosis are stored into *MEAN, *VARIANCE,
+ *SKEWNESS, and *KURTOSIS, respectively. Any of the result
+ pointers may be null, in which case no value is stored. */
+void
+moments_of_values (const union value *array, size_t cnt,
+ double *weight,
+ double *mean, double *variance,
+ double *skewness, double *kurtosis)
+{
+ enum moment max_moment;
+ struct moments m;
+ size_t idx;
+
+ if (kurtosis != NULL)
+ max_moment = MOMENT_KURTOSIS;
+ else if (skewness != NULL)
+ max_moment = MOMENT_SKEWNESS;
+ else if (variance != NULL)
+ max_moment = MOMENT_VARIANCE;
+ else
+ max_moment = MOMENT_MEAN;
+
+ init_moments (&m, max_moment);
+ for (idx = 0; idx < cnt; idx++)
+ moments_pass_one (&m, array[idx].f, 1.);
+ for (idx = 0; idx < cnt; idx++)
+ moments_pass_two (&m, array[idx].f, 1.);
+ moments_calculate (&m, weight, mean, variance, skewness, kurtosis);
+}
+
+/* Returns the standard error of the skewness for the given total
+ weight W.
+
+ From _SPSS Statistical Algorithms, 2nd ed., 0-918469-89-9,
+ section "DESCRIPTIVES". */
+double
+calc_seskew (double W)
+{
+ return sqrt ((6. * W * (W - 1.)) / ((W - 2.) * (W + 1.) * (W + 3.)));
+}
+
+/* Returns the standard error of the kurtosis for the given total
+ weight W.
+
+ From _SPSS Statistical Algorithms, 2nd ed., 0-918469-89-9,
+ section "DESCRIPTIVES", except that the sqrt symbol is omitted
+ there. */
+double
+calc_sekurt (double W)
+{
+ return sqrt ((4. * (pow2 (W) - 1.) * pow2 (calc_seskew (W)))
+ / ((W - 3.) * (W + 5.)));
+}
+\f
+#include <stdio.h>
+#include "command.h"
+#include "lexer.h"
+
+static int
+read_values (double **values, double **weights, size_t *cnt)
+{
+ size_t cap = 0;
+
+ *values = NULL;
+ *weights = NULL;
+ *cnt = 0;
+ while (token == T_NUM)
+ {
+ double value = tokval;
+ double weight = 1.;
+ lex_get ();
+ if (lex_match ('*'))
+ {
+ if (token != T_NUM)
+ {
+ lex_error (_("expecting weight value"));
+ return 0;
+ }
+ weight = tokval;
+ lex_get ();
+ }
+
+ if (*cnt >= cap)
+ {
+ cap = 2 * (cap + 8);
+ *values = xrealloc (*values, sizeof **values * cap);
+ *weights = xrealloc (*weights, sizeof **weights * cap);
+ }
+
+ (*values)[*cnt] = value;
+ (*weights)[*cnt] = weight;
+ (*cnt)++;
+ }
+
+ return 1;
+}
+
+int
+cmd_debug_moments (void)
+{
+ int retval = CMD_FAILURE;
+ struct moments *m = NULL;
+ double *values = NULL;
+ double *weights = NULL;
+ double weight, M[4];
+ int two_pass = 1;
+ size_t cnt;
+ size_t i;
+
+ if (lex_match_id ("ONEPASS"))
+ two_pass = 0;
+ if (token != '/')
+ {
+ lex_force_match ('/');
+ goto done;
+ }
+ fprintf (stderr, "%s => ", lex_rest_of_line (NULL));
+ lex_get ();
+
+ m = moments_create (MOMENT_KURTOSIS);
+ if (!read_values (&values, &weights, &cnt))
+ goto done;
+ if (two_pass)
+ {
+ for (i = 0; i < cnt; i++)
+ moments_pass_one (m, values[i], weights[i]);
+ }
+ for (i = 0; i < cnt; i++)
+ moments_pass_two (m, values[i], weights[i]);
+ moments_calculate (m, &weight, &M[0], &M[1], &M[2], &M[3]);
+
+ fprintf (stderr, "W=%.3f", weight);
+ for (i = 0; i < 4; i++)
+ {
+ fprintf (stderr, " M%d=", i + 1);
+ if (M[i] == SYSMIS)
+ fprintf (stderr, "sysmis");
+ else if (fabs (M[i]) <= 0.0005)
+ fprintf (stderr, "0.000");
+ else
+ fprintf (stderr, "%.3f", M[i]);
+ }
+ fprintf (stderr, "\n");
+
+ retval = lex_end_of_command ();
+
+ done:
+ moments_destroy (m);
+ free (values);
+ free (weights);
+ return retval;
+}
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2004 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@gnu.org>.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+#ifndef HEADER_MOMENTS
+#define HEADER_MOMENTS
+
+#include <stddef.h>
+#include "val.h"
+
+/* Moments of the mean.
+ Higher-order moments have higher values. */
+enum moment
+ {
+ MOMENT_NONE, /* No moments. */
+ MOMENT_MEAN, /* First-order moment. */
+ MOMENT_VARIANCE, /* Second-order moment. */
+ MOMENT_SKEWNESS, /* Third-order moment. */
+ MOMENT_KURTOSIS /* Fourth-order moment. */
+ };
+
+struct moments;
+
+struct moments *moments_create (enum moment max_moment);
+void moments_clear (struct moments *);
+void moments_pass_one (struct moments *, double value, double weight);
+void moments_pass_two (struct moments *, double value, double weight);
+void moments_calculate (const struct moments *,
+ double *weight,
+ double *mean, double *variance,
+ double *skewness, double *kurtosis);
+void moments_destroy (struct moments *);
+
+void moments_of_doubles (const double *array, size_t cnt,
+ double *weight,
+ double *mean, double *variance,
+ double *skewness, double *kurtosis);
+void moments_of_values (const union value *array, size_t cnt,
+ double *weight,
+ double *mean, double *variance,
+ double *skewness, double *kurtosis);
+
+double calc_seskew (double weight);
+double calc_sekurt (double weight);
+
+#endif /* moments.h */
#include <errno.h>
#include "algorithm.h"
#include "alloc.h"
+#include "casefile.h"
#include "command.h"
#include "error.h"
#include "expr.h"
#include "var.h"
#include "vfm.h"
#include "vfmP.h"
+#include "workspace.h"
#if HAVE_UNISTD_H
#include <unistd.h>
/* Other prototypes. */
static int compare_record (const union value *, const union value *,
const struct sort_cases_pgm *, int *idx_to_fv);
-static int compare_case_lists (const void *, const void *, void *);
+static int compare_cases (const struct ccase *, const struct ccase *, void *);
+static int compare_case_dblptrs (const void *, const void *, void *);
static struct internal_sort *do_internal_sort (struct sort_cases_pgm *,
int separate);
static void destroy_internal_sort (struct internal_sort *);
}
/* Sorts the active file based on the key variables specified in
- global variables vars and var_cnt. The output is either to the
- active file, if SEPARATE is zero, or to a separate file, if
- SEPARATE is nonzero. In the latter case the output cases can be
- read with a call to read_sort_output(). (In the former case the
- output cases should be dealt with through the usual vfm interface.)
+ global variables vars and var_cnt.
- The caller is responsible for freeing vars[]. */
+ If SEPARATE is zero, then output goes to the active file. The
+ output cases can be read through the usual VFM interfaces.
+
+ If SEPARATE is nonzero, then output goes to a separate file.
+ The output cases can be read with a call to
+ read_sort_output().
+
+ The caller is responsible for freeing SCP. */
int
sort_cases (struct sort_cases_pgm *scp, int separate)
{
return 1;
/* Fall back to an external sort. */
- if (vfm_source != NULL
- && case_source_is_class (vfm_source, &storage_source_class))
- storage_source_to_disk (vfm_source);
scp->xsrt = do_external_sort (scp, separate);
if (scp->xsrt != NULL)
return 1;
return 0;
}
\f
-/* Results of an internal sort. */
+/* Results of an internal sort.
+ Used only for sorting to a separate file. */
struct internal_sort
{
- struct case_list **results;
+ const struct ccase **cases;
+ size_t case_cnt;
};
/* If the data is in memory, do an internal sort. Return
struct internal_sort *isrt;
isrt = xmalloc (sizeof *isrt);
- isrt->results = NULL;
+ isrt->cases = NULL;
+ isrt->case_cnt = 0;
- if (case_source_is_class (vfm_source, &storage_source_class)
- && !storage_source_on_disk (vfm_source))
+ if (case_source_is_class (vfm_source, &storage_source_class))
{
- struct case_list *case_list;
- struct case_list **case_array;
- int case_cnt;
- int i;
+ struct casefile *casefile = storage_source_get_casefile (vfm_source);
- case_cnt = vfm_source->class->count (vfm_source);
- if (case_cnt <= 0)
- return isrt;
-
- if (case_cnt > get_max_workspace() / sizeof *case_array)
- goto error;
-
- case_list = storage_source_get_cases (vfm_source);
- case_array = malloc (sizeof *case_array * (case_cnt + 1));
- if (case_array == NULL)
- goto error;
-
- for (i = 0; case_list != NULL; i++)
+ if (!separate)
{
- case_array[i] = case_list;
- case_list = case_list->next;
+ if (!casefile_sort (casefile, compare_cases, scp))
+ goto error;
}
- assert (i == case_cnt);
- case_array[case_cnt] = NULL;
+ else
+ {
+ /* FIXME FIXME FIXME.
+ This is crap because the casefile could get flushed
+ to disk between the time we sort it and we use it
+ later, causing invalid pointer accesses.
+ The right solution is probably to extend casefiles
+ to support duplication. */
+ struct casereader *reader;
+ size_t case_idx;
+
+ if (!casefile_in_core (casefile))
+ goto error;
+
+ isrt->case_cnt = casefile_get_case_cnt (casefile);
+ isrt->cases = workspace_malloc (sizeof *isrt->cases
+ * isrt->case_cnt);
+ if (isrt->cases == NULL)
+ goto error;
- sort (case_array, case_cnt, sizeof *case_array,
- compare_case_lists, scp);
+ reader = casefile_get_reader (casefile);
+ for (case_idx = 0; case_idx < isrt->case_cnt; case_idx++)
+ {
+ casereader_read (reader, &isrt->cases[case_idx]);
+ assert (isrt->cases[case_idx] != NULL);
+ }
+ casereader_destroy (reader);
- if (!separate)
- {
- storage_source_set_cases (vfm_source, case_array[0]);
- for (i = 1; i <= case_cnt; i++)
- case_array[i - 1]->next = case_array[i];
- free (case_array);
+ sort (isrt->cases, isrt->case_cnt, casefile_get_case_size (casefile),
+ compare_case_dblptrs, scp);
}
- else
- isrt->results = case_array;
-
+
return isrt;
}
-
+
error:
free (isrt);
return NULL;
{
if (isrt != NULL)
{
- free (isrt->results);
+ workspace_free (isrt->cases, sizeof *isrt->cases * isrt->case_cnt);
free (isrt);
}
}
-/* Compares the VAR_CNT variables in VARS[] between the
- `case_list's at A and B, and returns a strcmp()-type
- result. */
+/* Compares the variables specified by SCP between the cases at A
+ and B, and returns a strcmp()-type result. */
static int
-compare_case_lists (const void *a_, const void *b_, void *scp_)
+compare_cases (const struct ccase *a, const struct ccase *b,
+ void *scp_)
{
struct sort_cases_pgm *scp = scp_;
- struct case_list *const *pa = a_;
- struct case_list *const *pb = b_;
- struct case_list *a = *pa;
- struct case_list *b = *pb;
- return compare_record (a->c.data, b->c.data, scp, NULL);
+ return compare_record (a->data, b->data, scp, NULL);
+}
+
+/* Compares the variables specified by SCP between the cases at A
+ and B, and returns a strcmp()-type result. */
+static int
+compare_case_dblptrs (const void *a_, const void *b_, void *scp_)
+{
+ struct sort_cases_pgm *scp = scp_;
+ struct ccase *const *pa = a_;
+ struct ccase *const *pb = b_;
+ struct ccase *a = *pa;
+ struct ccase *b = *pb;
+
+ return compare_record (a->data, b->data, scp, NULL);
}
\f
/* External sort. */
struct external_sort *xsrt;
int success = 0;
+ if (vfm_source != NULL
+ && case_source_is_class (vfm_source, &storage_source_class))
+ casefile_to_disk (storage_source_get_casefile (vfm_source));
+
xsrt = xmalloc (sizeof *xsrt);
xsrt->scp = scp;
if (!init_external_sort (xsrt))
read_sort_output_func *output_func,
void *aux)
{
- struct case_list **p;
+ size_t case_idx;
- for (p = isrt->results; *p; p++)
- if (!output_func (&(*p)->c, aux))
+ for (case_idx = 0; case_idx < isrt->case_cnt; case_idx++)
+ if (!output_func (isrt->cases[case_idx], aux))
break;
- free (isrt->results);
}
static void
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
- Written by Ben Pfaff <blp@gnu.org>.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
-
-#include <config.h>
-#include "stats.h"
-#include <math.h>
-
-/* Returns the fourth power of its argument. */
-double
-pow4 (double x)
-{
- x *= x;
- return x * x;
-}
-
-/* Returns the cube of its argument. */
-double
-cube (double x)
-{
- return x * x * x;
-}
-
-/* Returns the square of its argument. */
-double
-sqr (double x)
-{
- return x * x;
-}
-
-/*
- * kurtosis = [(n+1){n*sum(X**4) - 4*sum(X)*sum(X**3)
- * + 6*sum(X)**2*sum(X**2)/n - 3*sum(X)**4/n**2}]
- * /[(n-1)(n-2)(n-3)*(variance)**2]
- * -[3*{(n-1)**2}]
- * /[(n-2)(n-3)]
- *
- * This and other formulas from _Biometry_, Sokal and Rohlf,
- * W. H. Freeman and Company, 1969. See pages 117 and 136 especially.
- */
-double
-calc_kurt (const double d[4], double n, double variance)
-{
- return
- (((n + 1) * (n * d[3]
- - 4.0 * d[0] * d[2]
- + 6.0 * sqr (d[0]) * d[1] / n
- - 3.0 * pow4 (d[0]) / sqr (n)))
- / ((n - 1.0) * (n - 2.0) * (n - 3.0) * sqr (variance))
- - (3.0 * sqr (n - 1.0))
- / ((n - 2.0) * (n - 3.)));
-}
-
-/*
- * standard error of kurtosis = sqrt([24n((n-1)**2)]/[(n-3)(n-2)(n+3)(n+5)])
- */
-double
-calc_sekurt (double n)
-{
- return sqrt ((24.0 * n * sqr (n - 1.0))
- / ((n - 3.0) * (n - 2.0) * (n + 3.0) * (n + 5.0)));
-}
-
-/*
- * skewness = [n*sum(X**3) - 3*sum(X)*sum(X**2) + 2*sum(X)**3/n]/
- * /[(n-1)(n-2)*(variance)**3]
- */
-double
-calc_skew (const double d[3], double n, double stddev)
-{
- return
- ((n * d[2] - 3.0 * d[0] * d[1] + 2.0 * cube (d[0]) / n)
- / ((n - 1.0) * (n - 2.0) * cube (stddev)));
-}
-
-/*
- * standard error of skewness = sqrt([6n(n-1)] / [(n-2)(n+1)(n+3)])
- */
-double
-calc_seskew (double n)
-{
- return
- sqrt ((6.0 * n * (n - 1.0))
- / ((n - 2.0) * (n + 1.0) * (n + 3.0)));
-}
-
-/* Returns one-sided significance level corresponding to standard
- normal deviate X. Algorithm from _SPSS Statistical Algorithms_,
- Appendix 1. */
-#if 0
-double
-normal_sig (double x)
-{
- const double a1 = .070523078;
- const double a2 = .0422820123;
- const double a3 = .0092705272;
- const double a4 = .0001520143;
- const double a5 = .0002765672;
- const double a6 = .0000430638;
-
- const double z = fabs (x) <= 14.14 ? 0.7071067812 * fabs (x) : 10.;
- double r;
-
- r = 1. + z * (a1 + z * (a2 + z * (a3 + z * (a4 + z * (a5 + z * a6)))));
- r *= r; /* r ** 2 */
- r *= r; /* r ** 4 */
- r *= r; /* r ** 16 */
-
- return .5 / r;
-}
-#else /* 1 */
-/* Taken from _BASIC Statistics: An Introduction to Problem Solving
- with Your Personal Computer_, Jerry W. O'Dell, TAB 1984, page 314-5. */
-double
-normal_sig (double z)
-{
- double h;
-
- h = 1 + 0.0498673470 * z;
- z *= z;
- h += 0.0211410061 * z;
- z *= z;
- h += 0.0032776263 * z;
- z *= z;
- h += 0.0000380036 * z;
- z *= z;
- h += 0.0000488906 * z;
- z *= z;
- h += 0.0000053830 * z;
- return pow (h, -16.) / 2.;
-}
-#endif /* 1 */
-
-/* Algorithm from _Turbo Pascal Programmer's Toolkit_, Rugg and
- Feldman, Que 1989. Returns the significance level of chi-square
- value CHISQ with DF degrees of freedom, correct to at least 7
- decimal places. */
-double
-chisq_sig (double x, int k)
-{
- if (x <= 0. || k < 1)
- return 1.0;
- else if (k == 1)
- return 2. * normal_sig (sqrt (x));
- else if (k <= 30)
- {
- double z, z_partial, term, denom, numerator, value;
-
- z = 1.;
- z_partial = 1.;
- term = k;
- do
- {
- term += 2;
- z_partial *= x / term;
- if (z_partial >= 10000000.)
- return 0.;
- z += z_partial;
- }
- while (z_partial >= 1.e-7);
- denom = term = 2 - k % 2;
- while (term < k)
- {
- term += 2;
- denom *= term;
- }
- if (k % 2)
- {
- value = ((k + 1) / 2) * log (x) - x / 2.;
- numerator = exp (value) * sqrt (2. / x / PI);
- }
- else
- {
- value = k / 2. * log (x) - x / 2.;
- numerator = exp (value);
- }
- return 1. - numerator * z / denom;
- }
- else
- {
- double term, numer, norm_x;
-
- term = 2. / 9. / k;
- numer = pow (x / k, 1. / 3.);
- norm_x = numer / sqrt (term);
- return 1.0 - normal_sig (norm_x);
- }
-}
+++ /dev/null
-/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
- Written by Ben Pfaff <blp@gnu.org>.
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
-
-#if !statistics_h
-#define statistics_h 1
-
-/* These are all sample statistics except for mean since uses
- population statistics for whatever reason. */
-
-/* Define pi to the maximum precision available. */
-#include <math.h> /* defines M_PI on many systems */
-#ifndef PI
-#ifdef M_PI
-#define PI M_PI
-#else /* !PI && !M_PI */
-#define PI 3.14159265358979323846264338327
-#endif /* !PI && !M_PI */
-#endif /* !PI */
-
-extern double pow4 (double);
-extern double cube (double);
-extern double sqr (double);
-
-/* Returns the fourth power of its argument. */
-extern inline double
-pow4 (double x)
-{
- x *= x;
- return x * x;
-}
-
-/* Returns the cube of its argument. */
-extern inline double
-cube (double x)
-{
- return x * x * x;
-}
-
-/* Returns the square of its argument. */
-extern inline double
-sqr (double x)
-{
- return x * x;
-}
-
-/* Mean, standard error of mean. */
-#define calc_mean(D, N) \
- ((D)[0] / (N))
-#define calc_semean(STDDEV, N) \
- ((STDDEV) / sqrt (N))
-
-/* Variance, standard deviation, coefficient of variance. */
-#define calc_variance(D, N) \
- ( ((D)[1] - sqr ((D)[0])/(N)) / ((N)-1) )
-#define calc_stddev(VARIANCE) \
- (sqrt (VARIANCE))
-#define calc_cfvar(D, N) \
- ( calc_stddev (calc_variance (D, N)) / calc_mean (D, N) )
-
-/* Kurtosis, standard error of kurtosis. */
-double calc_kurt (const double d[4], double n, double variance);
-double calc_sekurt (double n);
-
-/* Skewness, standard error of skewness. */
-double calc_skew (const double d[3], double n, double stddev);
-double calc_seskew (double n);
-
-/* Significance. */
-double normal_sig (double x);
-double chisq_sig (double chisq, int df);
-
-#endif /* !statistics_h */
#include "lexer.h"
#include "error.h"
#include "magic.h"
+#include "misc.h"
#include "tab.h"
#include "som.h"
#include "value-labels.h"
#include "var.h"
#include "vfm.h"
#include "hash.h"
-#include "stats.h"
#include "t-test.h"
#include "levene.h"
df = gs0->n + gs1->n - 2.0 ;
tab_float (self->t, 5, i*2+3, TAB_RIGHT, df, 2, 0);
- pooled_variance = ( (gs0->n )*sqr(gs0->s_std_dev)
+ pooled_variance = ( (gs0->n )*pow2(gs0->s_std_dev)
+
- (gs1->n )*sqr(gs1->s_std_dev)
+ (gs1->n )*pow2(gs1->s_std_dev)
) / df ;
t = (gs0->mean - gs1->mean) / sqrt(pooled_variance) ;
tab_float(self->t, 7, i*2+3, TAB_RIGHT, mean_diff, 8, 3);
- std_err_diff = sqrt( sqr(gs0->se_mean) + sqr(gs1->se_mean));
+ std_err_diff = sqrt( pow2(gs0->se_mean) + pow2(gs1->se_mean));
tab_float(self->t, 8, i*2+3, TAB_RIGHT, std_err_diff, 8, 3);
TAB_LEFT, _("Equal variances not assumed"));
- se2 = (sqr(gs0->s_std_dev)/(gs0->n -1) ) +
- (sqr(gs1->s_std_dev)/(gs1->n -1) );
+ se2 = (pow2(gs0->s_std_dev)/(gs0->n -1) ) +
+ (pow2(gs1->s_std_dev)/(gs1->n -1) );
t = mean_diff / sqrt(se2) ;
tab_float (self->t, 4, i*2+3+1, TAB_RIGHT, t, 8, 3);
- df = sqr(se2) / (
- (sqr(sqr(gs0->s_std_dev)/(gs0->n - 1 ))
+ df = pow2(se2) / (
+ (pow2(pow2(gs0->s_std_dev)/(gs0->n - 1 ))
/(gs0->n -1 )
)
+
- (sqr(sqr(gs1->s_std_dev)/(gs1->n - 1 ))
+ (pow2(pow2(gs1->s_std_dev)/(gs1->n - 1 ))
/(gs1->n -1 )
)
) ;
t = (pairs[i].mean[0] - pairs[i].mean[1])
/ sqrt (
- ( sqr (pairs[i].s_std_dev[0]) + sqr (pairs[i].s_std_dev[1]) -
+ ( pow2 (pairs[i].s_std_dev[0]) + pow2 (pairs[i].s_std_dev[1]) -
2 * pairs[i].correlation *
pairs[i].s_std_dev[0] * pairs[i].s_std_dev[1] )
/ (n - 1)
double correlation_t =
pairs[i].correlation * sqrt(df) /
- sqrt(1 - sqr(pairs[i].correlation));
+ sqrt(1 - pow2(pairs[i].correlation));
/* row headings */
pairs[i].sum[0] += weight * val0->f;
pairs[i].sum[1] += weight * val1->f;
- pairs[i].ssq[0] += weight * sqr(val0->f);
- pairs[i].ssq[1] += weight * sqr(val1->f);
+ pairs[i].ssq[0] += weight * pow2(val0->f);
+ pairs[i].ssq[1] += weight * pow2(val1->f);
pairs[i].sum_of_prod += weight * val0->f * val1->f ;
pairs[i].sum_of_diffs += weight * ( val0->f - val1->f ) ;
- pairs[i].ssq_diffs += weight * sqr(val0->f - val1->f);
+ pairs[i].ssq_diffs += weight * pow2(val0->f - val1->f);
}
}
{
pairs[i].mean[j] = pairs[i].sum[j] / n ;
pairs[i].s_std_dev[j] = sqrt((pairs[i].ssq[j] / n -
- sqr(pairs[i].mean[j]))
+ pow2(pairs[i].mean[j]))
);
pairs[i].std_dev[j] = sqrt(n/(n-1)*(pairs[i].ssq[j] / n -
- sqr(pairs[i].mean[j]))
+ pow2(pairs[i].mean[j]))
);
}
pairs[i].std_dev_diff = sqrt ( n / (n - 1) * (
( pairs[i].ssq_diffs / n )
-
- sqr(pairs[i].mean_diff )
+ pow2(pairs[i].mean_diff )
) );
}
}
{
gs->n+=weight;
gs->sum+=weight * val->f;
- gs->ssq+=weight * sqr(val->f);
+ gs->ssq+=weight * pow2(val->f);
}
}
int vert; /* Whether to print the varname vertically. */
};
-/* DESCRIPTIVES private data. Note that the DESCRIPTIVES procedure also
- has a transformation, descriptives_trns. */
-enum
- {
- /* As these are used as bit indexes, there must be 32 or fewer.
- Be very careful in adjusting these, see the structure below
- and the table in descriptives.q. */
- dsc_mean = 0, dsc_semean, dsc_stddev, dsc_variance, dsc_kurt,
- dsc_sekurt, dsc_skew, dsc_seskew, dsc_range, dsc_min,
- dsc_max, dsc_sum, dsc_n_stats
- };
-
-struct descriptives_proc
- {
- /* Miscellaneous. */
- int dup; /* Finds duplicates in list of
- variables. */
- char zname[10]; /* Name for z-score variable. */
-
- /* Counts. */
- double valid, miss; /* Valid, missing--general. */
-
- /* Mean, moments about the mean. */
- double X_bar, M2, M3, M4;
- double min, max;
-
- /* Statistics. */
- double stats[dsc_n_stats]; /* Everything glommed together. */
- };
-
/* GET private data. */
struct get_proc
{
union
{
struct crosstab_proc crs;
- struct descriptives_proc dsc;
struct frequencies_proc frq;
struct list_proc lst;
struct means_proc mns;
#include <unistd.h> /* Required by SunOS4. */
#endif
#include "alloc.h"
+#include "casefile.h"
#include "do-ifP.h"
#include "error.h"
#include "expr.h"
stored, zero otherwise. */
static int compaction_necessary;
-/* Nonzero means that we've overflowed our allotted workspace.
- After that happens once during a session, we always store the
- active file on disk instead of in memory. (This policy may be
- too aggressive.) */
-static int workspace_overflow = 0;
-
/* Time at which vfm was last invoked. */
time_t last_vfm_invocation;
/* Copy all the variables except scratch variables from SRC to
DEST. */
+ /* FIXME: this should be temp_dict not default_dict I guess. */
var_cnt = dict_get_var_cnt (default_dict);
for (i = 0; i < var_cnt; i++)
{
/* Information about storage sink or source. */
struct storage_stream_info
{
- size_t case_cnt; /* Number of cases. */
- size_t case_size; /* Number of bytes in case. */
- enum { DISK, MEMORY } mode; /* Where is data stored? */
-
- /* Disk storage. */
- FILE *file; /* Data file. */
-
- /* Memory storage. */
- int max_cases; /* Maximum cases before switching to disk. */
- struct case_list *head; /* First case in list. */
- struct case_list *tail; /* Last case in list. */
+ struct casefile *casefile; /* Storage. */
};
-static void open_storage_file (struct storage_stream_info *info);
-
/* Initializes a storage sink. */
static void
storage_sink_open (struct case_sink *sink)
struct storage_stream_info *info;
sink->aux = info = xmalloc (sizeof *info);
- info->case_cnt = 0;
- info->case_size = sink->value_cnt * sizeof (union value);
- info->file = NULL;
- info->max_cases = 0;
- info->head = info->tail = NULL;
- if (workspace_overflow)
- {
- info->mode = DISK;
- open_storage_file (info);
- }
- else
- {
- info->mode = MEMORY;
- info->max_cases = (get_max_workspace()
- / (sizeof (struct case_list) + info->case_size));
- }
-}
-
-/* Creates a new temporary file and puts it into INFO. */
-static void
-open_storage_file (struct storage_stream_info *info)
-{
- info->file = tmpfile ();
- if (info->file == NULL)
- {
- msg (ME, _("An error occurred creating a temporary "
- "file for use as the active file: %s."),
- strerror (errno));
- err_failure ();
- }
-}
-
-/* Writes the VALUE_CNT values in VALUES to FILE. */
-static void
-write_storage_file (FILE *file, const union value *values, size_t value_cnt)
-{
- if (fwrite (values, sizeof *values * value_cnt, 1, file) != 1)
- {
- msg (ME, _("An error occurred writing to a "
- "temporary file used as the active file: %s."),
- strerror (errno));
- err_failure ();
- }
-}
-
-/* If INFO represents records in memory, moves them to disk.
- Each comprises VALUE_CNT `union value's. */
-static void
-storage_to_disk (struct storage_stream_info *info, size_t value_cnt)
-{
- struct case_list *cur, *next;
-
- if (info->mode == MEMORY)
- {
- info->mode = DISK;
- open_storage_file (info);
- for (cur = info->head; cur; cur = next)
- {
- next = cur->next;
- write_storage_file (info->file, cur->c.data, value_cnt);
- free (cur);
- }
- info->head = info->tail = NULL;
- }
+ info->casefile = casefile_create (sink->value_cnt * sizeof (union value));
}
/* Destroys storage stream represented by INFO. */
static void
destroy_storage_stream_info (struct storage_stream_info *info)
{
- if (info->mode == DISK)
- {
- if (info->file != NULL)
- fclose (info->file);
- }
- else
- {
- struct case_list *cur, *next;
-
- for (cur = info->head; cur; cur = next)
- {
- next = cur->next;
- free (cur);
- }
- }
+ casefile_destroy (info->casefile);
free (info);
}
{
struct storage_stream_info *info = sink->aux;
- info->case_cnt++;
- if (info->mode == MEMORY)
- {
- struct case_list *new_case;
-
- /* Copy case. */
- new_case = xmalloc (sizeof (struct case_list)
- + ((sink->value_cnt - 1) * sizeof (union value)));
- memcpy (&new_case->c, c, sizeof (union value) * sink->value_cnt);
-
- /* Append case to linked list. */
- new_case->next = NULL;
- if (info->head != NULL)
- info->tail->next = new_case;
- else
- info->head = new_case;
- info->tail = new_case;
-
- /* Dump all the cases to disk if we've run out of
- workspace. */
- if (info->case_cnt > info->max_cases)
- {
- workspace_overflow = 1;
- msg (MW, _("Workspace limit of %d KB (%d cases at %d bytes each) "
- "overflowed. Writing active file to disk."),
- get_max_workspace() / 1024, info->max_cases,
- sizeof (struct case_list) + info->case_size);
-
- storage_to_disk (info, sink->value_cnt);
- }
- }
- else
- write_storage_file (info->file, c->data, sink->value_cnt);
+ casefile_append (info->casefile, c);
}
/* Destroys internal data in SINK. */
static struct case_source *
storage_sink_make_source (struct case_sink *sink)
{
- struct storage_stream_info *info = sink->aux;
-
- if (info->mode == DISK)
- {
- /* Rewind the file. */
- assert (info->file != NULL);
- if (fseek (info->file, 0, SEEK_SET) != 0)
- {
- msg (ME, _("An error occurred while attempting to rewind a "
- "temporary file used as the active file: %s."),
- strerror (errno));
- err_failure ();
- }
- }
-
- return create_case_source (&storage_source_class, sink->dict, info);
+ return create_case_source (&storage_source_class, sink->dict, sink->aux);
}
/* Storage sink. */
{
struct storage_stream_info *info = source->aux;
- return info->case_cnt;
+ return casefile_get_case_cnt (info->casefile);
}
/* Reads all cases from the storage source and passes them one by one to
write_case(). */
static void
storage_source_read (struct case_source *source,
- struct ccase *c,
+ struct ccase *output_case,
write_case_func *write_case, write_case_data wc_data)
{
struct storage_stream_info *info = source->aux;
+ const struct ccase *casefile_case;
+ struct casereader *reader;
- if (info->mode == DISK)
+ reader = casefile_get_reader (info->casefile);
+ while (casereader_read (reader, &casefile_case))
{
- int i;
-
- for (i = 0; i < info->case_cnt; i++)
- {
- if (!fread (c, info->case_size, 1, info->file))
- {
- msg (ME, _("An error occurred while attempting to read from "
- "a temporary file created for the active file: %s."),
- strerror (errno));
- err_failure ();
- break;
- }
-
- if (!write_case (wc_data))
- break;
- }
- }
- else
- {
- while (info->head != NULL)
- {
- struct case_list *iter = info->head;
- memcpy (c, &iter->c, info->case_size);
- if (!write_case (wc_data))
- break;
-
- info->head = iter->next;
- free (iter);
- }
- info->tail = NULL;
+ memcpy (output_case, casefile_case,
+ casefile_get_case_size (info->casefile));
+ write_case (wc_data);
}
+ casereader_destroy (reader);
}
/* Destroys the source's internal data. */
storage_source_destroy,
};
-/* Returns nonzero only if SOURCE is stored on disk (instead of
- in memory). */
-int
-storage_source_on_disk (const struct case_source *source)
+struct casefile *
+storage_source_get_casefile (struct case_source *source)
{
struct storage_stream_info *info = source->aux;
- return info->mode == DISK;
-}
-
-/* Returns the list of cases in storage source SOURCE. */
-struct case_list *
-storage_source_get_cases (const struct case_source *source)
-{
- struct storage_stream_info *info = source->aux;
-
- assert (info->mode == MEMORY);
- return info->head;
-}
-
-/* Sets the list of cases in memory source SOURCE to CASES. */
-void
-storage_source_set_cases (const struct case_source *source,
- struct case_list *cases)
-{
- struct storage_stream_info *info = source->aux;
-
- assert (info->mode == MEMORY);
- info->head = cases;
-}
-
-/* If SOURCE has its cases in memory, writes them to disk. */
-void
-storage_source_to_disk (struct case_source *source)
-{
- struct storage_stream_info *info = source->aux;
-
- storage_to_disk (info, source->value_cnt);
+ assert (source->class == &storage_source_class);
+ return info->casefile;
}
\f
/* Null sink. Used by a few procedures that keep track of output
tab_flags (t, SOMF_NO_TITLE);
tab_submit (t);
}
+\f
+/* Represents auxiliary data for handling SPLIT FILE in a
+ multipass procedure. */
+struct multipass_split_aux_data
+ {
+ struct ccase *prev_case; /* Data in previous case. */
+ struct casefile *casefile; /* Accumulates data for a split. */
+
+ /* Function to call with the accumulated data. */
+ void (*split_func) (const struct casefile *, void *);
+ void *func_aux; /* Auxiliary data. */
+ };
+
+static int multipass_split_callback (struct ccase *c, void *aux_);
+static void multipass_split_output (struct multipass_split_aux_data *);
+
+void
+multipass_procedure_with_splits (void (*split_func) (const struct casefile *,
+ void *),
+ void *func_aux)
+{
+ struct multipass_split_aux_data aux;
+
+ assert (split_func != NULL);
+
+ aux.prev_case = xmalloc (dict_get_case_size (default_dict));
+ aux.casefile = NULL;
+ aux.split_func = split_func;
+ aux.func_aux = func_aux;
+
+ procedure (multipass_split_callback, &aux);
+
+ if (aux.casefile != NULL)
+ multipass_split_output (&aux);
+ free (aux.prev_case);
+}
+
+/* procedure() callback used by multipass_procedure_with_splits(). */
+static int
+multipass_split_callback (struct ccase *c, void *aux_)
+{
+ struct multipass_split_aux_data *aux = aux_;
+
+ /* Start a new series if needed. */
+ if (aux->casefile == NULL || !equal_splits (c, aux->prev_case))
+ {
+ /* Pass any cases to split_func. */
+ if (aux->casefile != NULL)
+ multipass_split_output (aux);
+
+ /* Start a new casefile. */
+ aux->casefile = casefile_create (dict_get_case_size (default_dict));
+
+ /* Record split values. */
+ dump_splits (c);
+ memcpy (aux->prev_case, c, dict_get_case_size (default_dict));
+ }
+
+ casefile_append (aux->casefile, c);
+
+ return 1;
+}
+
+static void
+multipass_split_output (struct multipass_split_aux_data *aux)
+{
+ assert (aux->casefile != NULL);
+ aux->split_func (aux->casefile, aux->func_aux);
+ casefile_destroy (aux->casefile);
+ aux->casefile = NULL;
+}
int case_source_is_class (const struct case_source *,
const struct case_source_class *);
-int storage_source_on_disk (const struct case_source *);
-struct case_list *storage_source_get_cases (const struct case_source *);
-void storage_source_set_cases (const struct case_source *,
- struct case_list *);
-void storage_source_to_disk (struct case_source *source);
+struct casefile *storage_source_get_casefile (struct case_source *);
\f
/* The replacement active file, to which cases are written. */
extern struct case_sink *vfm_sink;
struct case_sink *create_case_sink (const struct case_sink_class *,
const struct dictionary *,
void *);
+void case_sink_open (struct case_sink *);
+void case_sink_write (struct case_sink *, const struct ccase *);
+void case_sink_destroy (struct case_sink *);
void free_case_sink (struct case_sink *);
\f
/* Number of cases to lag. */
void (*end_func) (void *aux),
void *aux);
struct ccase *lagged_case (int n_before);
+\f
+void multipass_procedure_with_splits (void (*) (const struct casefile *,
+ void *),
+ void *aux);
#endif /* !vfm_h */
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2004 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@gnu.org>.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+#include <config.h>
+#include "workspace.h"
+#include <assert.h>
+#include <stdlib.h>
+#include "alloc.h"
+#include "settings.h"
+
+static size_t workspace_used;
+
+/* Returns a block SIZE bytes in size, charging it against the
+ workspace limit. Returns a null pointer if the workspace
+ limit is reached. */
+void *
+workspace_malloc (size_t size)
+{
+ if (workspace_used + size > get_max_workspace ())
+ return NULL;
+
+ workspace_used += size;
+ return xmalloc (size);
+}
+
+/* Frees BLOCK, which is SIZE bytes long, and credits it toward
+ the workspace limit. */
+void
+workspace_free (void *block, size_t size)
+{
+ if (block != NULL)
+ {
+ assert (workspace_used >= size);
+ free (block);
+ workspace_used -= size;
+ }
+}
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2004 Free Software Foundation, Inc.
+ Written by Ben Pfaff <blp@gnu.org>.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ 02111-1307, USA. */
+
+#ifndef HEADER_WORKSPACE
+#define HEADER_WORKSPACE
+
+#include <stddef.h>
+
+void *workspace_malloc (size_t);
+void workspace_free (void *, size_t);
+
+#endif /* workspace.h */
+Mon Mar 29 15:25:09 2004 Ben Pfaff <blp@gnu.org>
+
+ * Makefile.am: (TESTS) Add xforms/casefile.sh,
+ stats/descript-basic.sh, stats/descript-missing.sh,
+ stats/moments.sh. Remove command/descriptives.sh.
+
+ * command/descriptives.sh: Removed.
+
+ * command/weight.sh: Fix output (statistic values were wrong!).
+
+ * stats/descript-basic.sh: New test.
+
+ * stats/descript-missing.sh: New test.
+
+ * stats/moments.sh: New test.
+
+ * xforms/casefile.sh: New test.
+
+ * xforms/expressions.sh: Cleans up after itself now.
+
Fri Mar 26 00:55:48 2004 Ben Pfaff <blp@gnu.org>
* Makefile.am: (TESTS) Add xforms/expressions.sh, remove
command/beg-data.sh \
command/bignum.sh \
command/count.sh \
- command/descriptives.sh \
command/erase.sh \
command/file-label.sh \
command/filter.sh \
bugs/t-test.sh \
bugs/temporary.sh \
bugs/val-labs.sh \
- xforms/expressions.sh
+ xforms/casefile.sh \
+ xforms/expressions.sh \
+ stats/descript-basic.sh \
+ stats/descript-missing.sh \
+ stats/moments.sh
noinst_PROGRAMS = gengarbage
+++ /dev/null
-#!/bin/sh
-
-# This program tests that the descriptives command actually works
-
-TEMPDIR=/tmp/pspp-tst-$$
-
-here=`pwd`;
-
-# ensure that top_srcdir is absolute
-cd $top_srcdir; top_srcdir=`pwd`
-
-export STAT_CONFIG_PATH=$top_srcdir/config
-
-
-cleanup()
-{
- rm -rf $TEMPDIR
-}
-
-
-fail()
-{
- echo $activity
- echo FAILED
- cleanup;
- exit 1;
-}
-
-
-no_result()
-{
- echo $activity
- echo NO RESULT;
- cleanup;
- exit 2;
-}
-
-pass()
-{
- cleanup;
- exit 0;
-}
-
-mkdir -p $TEMPDIR
-
-cd $TEMPDIR
-
-activity="create program"
-cat > $TEMPDIR/descript.stat <<EOF
-title 'Test DESCRIPTIVES procedure'.
-
-data list / v0 to v16 1-17.
-begin data.
-12128989012389023
-34128080123890128
-56127781237893217
-78127378123793112
-90913781237892318
-37978547878935789
-52878237892378279
-12377912789378932
-26787654347894348
-29137178947891888
-end data.
-
-descript all/stat=all/format=serial.
-
-EOF
-if [ $? -ne 0 ] ; then no_result ; fi
-
-
-activity="run program"
-$SUPERVISOR $here/../src/pspp -o raw-ascii $TEMPDIR/descript.stat
-if [ $? -ne 0 ] ; then no_result ; fi
-
-activity="compare output"
-diff -B -b $TEMPDIR/pspp.list - <<EOF
-1.1 DATA LIST. Reading 1 record from the command file.
-+--------+------+-------+------+
-|Variable|Record|Columns|Format|
-#========#======#=======#======#
-|V0 | 1| 1- 1|F1.0 |
-|V1 | 1| 2- 2|F1.0 |
-|V2 | 1| 3- 3|F1.0 |
-|V3 | 1| 4- 4|F1.0 |
-|V4 | 1| 5- 5|F1.0 |
-|V5 | 1| 6- 6|F1.0 |
-|V6 | 1| 7- 7|F1.0 |
-|V7 | 1| 8- 8|F1.0 |
-|V8 | 1| 9- 9|F1.0 |
-|V9 | 1| 10- 10|F1.0 |
-|V10 | 1| 11- 11|F1.0 |
-|V11 | 1| 12- 12|F1.0 |
-|V12 | 1| 13- 13|F1.0 |
-|V13 | 1| 14- 14|F1.0 |
-|V14 | 1| 15- 15|F1.0 |
-|V15 | 1| 16- 16|F1.0 |
-|V16 | 1| 17- 17|F1.0 |
-+--------+------+-------+------+
-
-2.1 DESCRIPTIVES. Valid cases = 10; cases with missing value(s) = 0.
-+--------#-------+---------+-----+--------+-------+--------+--------+--------+--------+--------+-----+-------+-------+------+
-|Variable#Valid N|Missing N| Mean|S E Mean|Std Dev|Variance|Kurtosis|S E Kurt|Skewness|S E Skew|Range|Minimum|Maximum| Sum |
-#========#=======#=========#=====#========#=======#========#========#========#========#========#=====#=======#=======#======#
-|V0 # 10| 0|3.800| .841| 2.658| 7.067| -.035| 1.334| .889| .687|8.000| 1.000| 9.000|38.000|
-|V1 # 10| 0|4.600| .957| 3.026| 9.156| -1.386| 1.334| -.032| .687|9.000| .000| 9.000|46.000|
-|V2 # 10| 0|4.100| 1.159| 3.665| 13.433| -2.019| 1.334| .476| .687|8.000| 1.000| 9.000|41.000|
-|V3 # 10| 0|4.100| .875| 2.767| 7.656| -2.049| 1.334| .422| .687|7.000| 1.000| 8.000|41.000|
-|V4 # 10| 0|7.000| .471| 1.491| 2.222| 7.152| 1.334| -2.516| .687|5.000| 3.000| 8.000|70.000|
-|V5 # 10| 0|4.900| 1.027| 3.247| 10.544| -1.401| 1.334| -.205| .687|9.000| .000| 9.000|49.000|
-|V6 # 10| 0|5.900| .795| 2.514| 6.322| -.290| 1.334| -.960| .687|7.000| 1.000| 8.000|59.000|
-|V7 # 10| 0|4.700| 1.096| 3.466| 12.011| -1.993| 1.334| -.165| .687|9.000| .000| 9.000|47.000|
-|V8 # 10| 0|4.100| 1.100| 3.479| 12.100| -1.928| 1.334| .371| .687|9.000| .000| 9.000|41.000|
-|V9 # 10| 0|4.300| .870| 2.751| 7.567| -.875| 1.334| .730| .687|8.000| 1.000| 9.000|43.000|
-|V10 # 10| 0|5.500| .847| 2.677| 7.167| -1.842| 1.334| -.326| .687|7.000| 2.000| 9.000|55.000|
-|V11 # 10| 0|6.500| .778| 2.461| 6.056| -1.276| 1.334| -.895| .687|6.000| 3.000| 9.000|65.000|
-|V12 # 10| 0|7.900| .605| 1.912| 3.656| 5.241| 1.334| -2.208| .687|6.000| 3.000| 9.000|79.000|
-|V13 # 10| 0|4.300| .989| 3.129| 9.789| -1.248| 1.334| .333| .687|9.000| .000| 9.000|43.000|
-|V14 # 10| 0|3.600| 1.013| 3.204| 10.267| -.961| 1.334| .809| .687|9.000| .000| 9.000|36.000|
-|V15 # 10| 0|3.700| .920| 2.908| 8.456| -1.352| 1.334| .710| .687|7.000| 1.000| 8.000|37.000|
-|V16 # 10| 0|6.400| .909| 2.875| 8.267| -1.142| 1.334| -.923| .687|7.000| 2.000| 9.000|64.000|
-+--------#-------+---------+-----+--------+-------+--------+--------+--------+--------+--------+-----+-------+-------+------+
-EOF
-if [ $? -ne 0 ] ; then fail ; fi
-
-
-pass
+--------#-------+---------+------+--------+-------+--------+--------+--------+--------+--------+------+-------+-------+---------+
|Variable#Valid N|Missing N| Mean |S E Mean|Std Dev|Variance|Kurtosis|S E Kurt|Skewness|S E Skew| Range|Minimum|Maximum| Sum |
#========#=======#=========#======#========#=======#========#========#========#========#========#======#=======#=======#=========#
-|AVAR # 730| 0|31.515| .405| 10.937| 119.608|2548.162| .181| 1.345| .090|76.000| 18.000| 94.000|23006.000|
+|AVAR # 730| 0|31.515| .405| 10.937| 119.608| 2.411| .181| 1.345| .090|76.000| 18.000| 94.000|23006.000|
+--------#-------+---------+------+--------+-------+--------+--------+--------+--------+--------+------+-------+-------+---------+
3.1 FREQUENCIES. AVAR:
--- /dev/null
+#!/bin/sh
+
+# This program tests that the descriptives command actually works
+
+TEMPDIR=/tmp/pspp-tst-$$
+
+here=`pwd`;
+
+# ensure that top_srcdir is absolute
+cd $top_srcdir; top_srcdir=`pwd`
+
+export STAT_CONFIG_PATH=$top_srcdir/config
+
+
+cleanup()
+{
+ rm -rf $TEMPDIR
+}
+
+
+fail()
+{
+ echo $activity
+ echo FAILED
+ cleanup;
+ exit 1;
+}
+
+
+no_result()
+{
+ echo $activity
+ echo NO RESULT;
+ cleanup;
+ exit 2;
+}
+
+pass()
+{
+ cleanup;
+ exit 0;
+}
+
+mkdir -p $TEMPDIR
+
+cd $TEMPDIR
+
+activity="create program"
+cat > $TEMPDIR/descript.stat <<EOF
+title 'Test DESCRIPTIVES procedure'.
+
+data list / v0 to v16 1-17.
+begin data.
+12128989012389023
+34128080123890128
+56127781237893217
+78127378123793112
+90913781237892318
+37978547878935789
+52878237892378279
+12377912789378932
+26787654347894348
+29137178947891888
+end data.
+
+descript all/stat=all/format=serial.
+
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+
+
+activity="run program"
+$SUPERVISOR $here/../src/pspp -o raw-ascii $TEMPDIR/descript.stat
+if [ $? -ne 0 ] ; then no_result ; fi
+
+activity="compare output"
+diff -B -b $TEMPDIR/pspp.list - <<EOF
+1.1 DATA LIST. Reading 1 record from the command file.
++--------+------+-------+------+
+|Variable|Record|Columns|Format|
+#========#======#=======#======#
+|V0 | 1| 1- 1|F1.0 |
+|V1 | 1| 2- 2|F1.0 |
+|V2 | 1| 3- 3|F1.0 |
+|V3 | 1| 4- 4|F1.0 |
+|V4 | 1| 5- 5|F1.0 |
+|V5 | 1| 6- 6|F1.0 |
+|V6 | 1| 7- 7|F1.0 |
+|V7 | 1| 8- 8|F1.0 |
+|V8 | 1| 9- 9|F1.0 |
+|V9 | 1| 10- 10|F1.0 |
+|V10 | 1| 11- 11|F1.0 |
+|V11 | 1| 12- 12|F1.0 |
+|V12 | 1| 13- 13|F1.0 |
+|V13 | 1| 14- 14|F1.0 |
+|V14 | 1| 15- 15|F1.0 |
+|V15 | 1| 16- 16|F1.0 |
+|V16 | 1| 17- 17|F1.0 |
++--------+------+-------+------+
+
+2.1 DESCRIPTIVES. Valid cases = 10; cases with missing value(s) = 0.
++--------#-------+---------+-----+--------+-------+--------+--------+--------+--------+--------+-----+-------+-------+------+
+|Variable#Valid N|Missing N| Mean|S E Mean|Std Dev|Variance|Kurtosis|S E Kurt|Skewness|S E Skew|Range|Minimum|Maximum| Sum |
+#========#=======#=========#=====#========#=======#========#========#========#========#========#=====#=======#=======#======#
+|V0 # 10| 0|3.800| .841| 2.658| 7.067| -.035| 1.334| .889| .687|8.000| 1.000| 9.000|38.000|
+|V1 # 10| 0|4.600| .957| 3.026| 9.156| -1.386| 1.334| -.032| .687|9.000| .000| 9.000|46.000|
+|V2 # 10| 0|4.100| 1.159| 3.665| 13.433| -2.019| 1.334| .476| .687|8.000| 1.000| 9.000|41.000|
+|V3 # 10| 0|4.100| .875| 2.767| 7.656| -2.049| 1.334| .422| .687|7.000| 1.000| 8.000|41.000|
+|V4 # 10| 0|7.000| .471| 1.491| 2.222| 7.152| 1.334| -2.516| .687|5.000| 3.000| 8.000|70.000|
+|V5 # 10| 0|4.900| 1.027| 3.247| 10.544| -1.401| 1.334| -.205| .687|9.000| .000| 9.000|49.000|
+|V6 # 10| 0|5.900| .795| 2.514| 6.322| -.290| 1.334| -.960| .687|7.000| 1.000| 8.000|59.000|
+|V7 # 10| 0|4.700| 1.096| 3.466| 12.011| -1.993| 1.334| -.165| .687|9.000| .000| 9.000|47.000|
+|V8 # 10| 0|4.100| 1.100| 3.479| 12.100| -1.928| 1.334| .371| .687|9.000| .000| 9.000|41.000|
+|V9 # 10| 0|4.300| .870| 2.751| 7.567| -.875| 1.334| .730| .687|8.000| 1.000| 9.000|43.000|
+|V10 # 10| 0|5.500| .847| 2.677| 7.167| -1.842| 1.334| -.326| .687|7.000| 2.000| 9.000|55.000|
+|V11 # 10| 0|6.500| .778| 2.461| 6.056| -1.276| 1.334| -.895| .687|6.000| 3.000| 9.000|65.000|
+|V12 # 10| 0|7.900| .605| 1.912| 3.656| 5.241| 1.334| -2.208| .687|6.000| 3.000| 9.000|79.000|
+|V13 # 10| 0|4.300| .989| 3.129| 9.789| -1.248| 1.334| .333| .687|9.000| .000| 9.000|43.000|
+|V14 # 10| 0|3.600| 1.013| 3.204| 10.267| -.961| 1.334| .809| .687|9.000| .000| 9.000|36.000|
+|V15 # 10| 0|3.700| .920| 2.908| 8.456| -1.352| 1.334| .710| .687|7.000| 1.000| 8.000|37.000|
+|V16 # 10| 0|6.400| .909| 2.875| 8.267| -1.142| 1.334| -.923| .687|7.000| 2.000| 9.000|64.000|
++--------#-------+---------+-----+--------+-------+--------+--------+--------+--------+--------+-----+-------+-------+------+
+EOF
+if [ $? -ne 0 ] ; then fail ; fi
+
+
+pass
--- /dev/null
+#!/bin/sh
+
+# This program tests that the descriptives command actually works
+
+TEMPDIR=/tmp/pspp-tst-$$
+
+here=`pwd`;
+
+# ensure that top_srcdir is absolute
+cd $top_srcdir; top_srcdir=`pwd`
+
+export STAT_CONFIG_PATH=$top_srcdir/config
+
+
+cleanup()
+{
+ rm -rf $TEMPDIR
+}
+
+
+fail()
+{
+ echo $activity
+ echo FAILED
+ cleanup;
+ exit 1;
+}
+
+
+no_result()
+{
+ echo $activity
+ echo NO RESULT;
+ cleanup;
+ exit 2;
+}
+
+pass()
+{
+ cleanup;
+ exit 0;
+}
+
+mkdir -p $TEMPDIR
+
+cd $TEMPDIR
+
+activity="create program"
+cat > $TEMPDIR/descript.stat <<EOF
+title 'Test DESCRIPTIVES procedure'.
+
+data list / v1 to v3 1-3.
+mis val v1 to v3 (1).
+begin data.
+111
+
+ 1
+1 1
+112
+123
+234
+end data.
+
+descript all/stat=all/format=serial.
+descript all/stat=all/format=serial/missing=include.
+descript all/stat=all/format=serial/missing=listwise.
+descript all/stat=all/format=serial/missing=listwise include.
+
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+
+
+activity="run program"
+$SUPERVISOR $here/../src/pspp -o raw-ascii $TEMPDIR/descript.stat
+if [ $? -ne 0 ] ; then no_result ; fi
+
+activity="compare output"
+diff -B -b $TEMPDIR/pspp.list - <<EOF
+1.1 DATA LIST. Reading 1 record from the command file.
++--------+------+-------+------+
+|Variable|Record|Columns|Format|
+#========#======#=======#======#
+|V1 | 1| 1- 1|F1.0 |
+|V2 | 1| 2- 2|F1.0 |
+|V3 | 1| 3- 3|F1.0 |
++--------+------+-------+------+
+
+2.1 DESCRIPTIVES. Valid cases = 7; cases with missing value(s) = 6.
++--------#-------+---------+-----+--------+-------+--------+--------+--------+--------+--------+-----+-------+-------+-----+
+|Variable#Valid N|Missing N| Mean|S E Mean|Std Dev|Variance|Kurtosis|S E Kurt|Skewness|S E Skew|Range|Minimum|Maximum| Sum |
+#========#=======#=========#=====#========#=======#========#========#========#========#========#=====#=======#=======#=====#
+|V1 # 1| 6|2.000| . | . | . | . | . | . | . | .000| 2.000| 2.000|2.000|
+|V2 # 2| 5|2.500| .500| .707| .500| . | . | . | . |1.000| 2.000| 3.000|5.000|
+|V3 # 3| 4|3.000| .577| 1.000| 1.000| . | . | .000| 1.225|2.000| 2.000| 4.000|9.000|
++--------#-------+---------+-----+--------+-------+--------+--------+--------+--------+--------+-----+-------+-------+-----+
+
+3.1 DESCRIPTIVES. Valid cases = 7; cases with missing value(s) = 3.
++--------#-------+---------+-----+--------+-------+--------+--------+--------+--------+--------+-----+-------+-------+------+
+|Variable#Valid N|Missing N| Mean|S E Mean|Std Dev|Variance|Kurtosis|S E Kurt|Skewness|S E Skew|Range|Minimum|Maximum| Sum |
+#========#=======#=========#=====#========#=======#========#========#========#========#========#=====#=======#=======#======#
+|V1 # 5| 2|1.200| .200| .447| .200| 5.000| 2.000| 2.236| .913|1.000| 1.000| 2.000| 6.000|
+|V2 # 5| 2|1.600| .400| .894| .800| .312| 2.000| 1.258| .913|2.000| 1.000| 3.000| 8.000|
+|V3 # 5| 2|2.200| .583| 1.304| 1.700| -1.488| 2.000| .541| .913|3.000| 1.000| 4.000|11.000|
++--------#-------+---------+-----+--------+-------+--------+--------+--------+--------+--------+-----+-------+-------+------+
+
+4.1 DESCRIPTIVES. Valid cases = 1; cases with missing value(s) = 6.
++--------#-------+---------+-----+--------+-------+--------+--------+--------+--------+--------+-----+-------+-------+-----+
+|Variable#Valid N|Missing N| Mean|S E Mean|Std Dev|Variance|Kurtosis|S E Kurt|Skewness|S E Skew|Range|Minimum|Maximum| Sum |
+#========#=======#=========#=====#========#=======#========#========#========#========#========#=====#=======#=======#=====#
+|V1 # 1| 0|2.000| . | . | . | . | . | . | . | .000| 2.000| 2.000|2.000|
+|V2 # 1| 0|3.000| . | . | . | . | . | . | . | .000| 3.000| 3.000|3.000|
+|V3 # 1| 0|4.000| . | . | . | . | . | . | . | .000| 4.000| 4.000|4.000|
++--------#-------+---------+-----+--------+-------+--------+--------+--------+--------+--------+-----+-------+-------+-----+
+
+5.1 DESCRIPTIVES. Valid cases = 4; cases with missing value(s) = 3.
++--------#-------+---------+-----+--------+-------+--------+--------+--------+--------+--------+-----+-------+-------+------+
+|Variable#Valid N|Missing N| Mean|S E Mean|Std Dev|Variance|Kurtosis|S E Kurt|Skewness|S E Skew|Range|Minimum|Maximum| Sum |
+#========#=======#=========#=====#========#=======#========#========#========#========#========#=====#=======#=======#======#
+|V1 # 4| 0|1.250| .250| .500| .250| 4.000| 2.619| 2.000| 1.014|1.000| 1.000| 2.000| 5.000|
+|V2 # 4| 0|1.750| .479| .957| .917| -1.289| 2.619| .855| 1.014|2.000| 1.000| 3.000| 7.000|
+|V3 # 4| 0|2.500| .645| 1.291| 1.667| -1.200| 2.619| .000| 1.014|3.000| 1.000| 4.000|10.000|
++--------#-------+---------+-----+--------+-------+--------+--------+--------+--------+--------+-----+-------+-------+------+
+EOF
+if [ $? -ne 0 ] ; then fail ; fi
+
+
+pass
--- /dev/null
+#! /bin/sh
+
+# Tests calculation of moments.
+
+TEMPDIR=/tmp/pspp-tst-$$
+
+here=`pwd`;
+
+# ensure that top_srcdir is absolute
+cd $top_srcdir; top_srcdir=`pwd`
+
+export STAT_CONFIG_PATH=$top_srcdir/config
+
+
+cleanup()
+{
+ rm -rf $TEMPDIR
+ :
+}
+
+
+fail()
+{
+ echo $activity
+ echo FAILED
+ cleanup;
+ exit 1;
+}
+
+
+no_result()
+{
+ echo $activity
+ echo NO RESULT;
+ cleanup;
+ exit 2;
+}
+
+pass()
+{
+ cleanup;
+ exit 0;
+}
+
+mkdir -p $TEMPDIR
+
+cd $TEMPDIR
+activity="create one-pass moments list"
+sed -ne 's/#.*//;/^[ ]*$/!p' > $TEMPDIR/moments-list-1p <<'EOF'
+# Both the one-pass and two-pass algorithms should be
+# able to cope properly with these.
+1 2 3 4 => W=4.000 M1=2.500 M2=1.667 M3=0.000 M4=-1.200
+1*5 2*5 3*5 4*5 => W=20.000 M1=2.500 M2=1.316 M3=0.000 M4=-1.401
+1*1 2*2 3*3 4*4 => W=10.000 M1=3.000 M2=1.111 M3=-0.712 M4=-0.450
+1*0 => W=0.000 M1=sysmis M2=sysmis M3=sysmis M4=sysmis
+1*1 => W=1.000 M1=1.000 M2=sysmis M3=sysmis M4=sysmis
+1*2 => W=2.000 M1=1.000 M2=0.000 M3=sysmis M4=sysmis
+1*3 => W=3.000 M1=1.000 M2=0.000 M3=sysmis M4=sysmis
+1*2 3 => W=3.000 M1=1.667 M2=1.333 M3=1.732 M4=sysmis
+1 1.00000001 => W=2.000 M1=1.000 M2=0.000 M3=sysmis M4=sysmis
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+
+cp $TEMPDIR/moments-list-1p $TEMPDIR/moments-list-2p
+sed -ne 's/#.*//;/^[ ]*$/!p' >> $TEMPDIR/moments-list-2p <<'EOF'
+# Only the two-pass algorithm can be expected to produce
+# proper third and fourth moments here.
+1000001 1000002 1000003 1000004 => W=4.000 M1=1000002.500 M2=1.667 M3=0.000 M4=-1.200
+EOF
+
+activity="create two-pass input file"
+sed < $TEMPDIR/moments-list-2p >> $TEMPDIR/moments-2p.stat \
+ -e 's#^\(.*\) => \(.*\)$#DEBUG MOMENTS/\1.#'
+if [ $? -ne 0 ] ; then no_result ; fi
+
+activity="run program"
+$SUPERVISOR $here/../src/pspp --testing-mode -o raw-ascii \
+ $TEMPDIR/moments-2p.stat >$TEMPDIR/moments-2p.err 2> $TEMPDIR/moments-2p.out
+
+activity="compare output"
+diff -B -b $TEMPDIR/moments-list-2p $TEMPDIR/moments-2p.out
+if [ $? -ne 0 ] ; then fail ; fi
+
+activity="create input file"
+sed < $TEMPDIR/moments-list-1p >> $TEMPDIR/moments-1p.stat \
+ -e 's#^\(.*\) => \(.*\)$#DEBUG MOMENTS ONEPASS/\1.#'
+if [ $? -ne 0 ] ; then no_result ; fi
+
+activity="run program"
+$SUPERVISOR $here/../src/pspp --testing-mode -o raw-ascii \
+ $TEMPDIR/moments-1p.stat >$TEMPDIR/moments-1p.err 2> $TEMPDIR/moments-1p.out
+
+activity="compare output"
+diff -B -b $TEMPDIR/moments-list-1p $TEMPDIR/moments-1p.out
+if [ $? -ne 0 ] ; then fail ; fi
+
+pass
--- /dev/null
+#!/bin/sh
+
+# This program tests casefiles by running DEBUG CASEFILE.
+
+TEMPDIR=/tmp/pspp-tst-$$
+
+here=`pwd`;
+
+# ensure that top_srcdir is absolute
+cd $top_srcdir; top_srcdir=`pwd`
+
+export STAT_CONFIG_PATH=$top_srcdir/config
+
+
+cleanup()
+{
+ rm -rf $TEMPDIR
+}
+
+
+fail()
+{
+ echo $activity
+ echo FAILED
+ cleanup;
+ exit 1;
+}
+
+
+no_result()
+{
+ echo $activity
+ echo NO RESULT;
+ cleanup;
+ exit 2;
+}
+
+pass()
+{
+ cleanup;
+ exit 0;
+}
+
+mkdir -p $TEMPDIR
+
+cd $TEMPDIR
+
+activity="create program"
+cat > $TEMPDIR/casefile.stat <<EOF
+DEBUG CASEFILE SMALL.
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+
+activity="run program"
+$SUPERVISOR $here/../src/pspp $TEMPDIR/casefile.stat > $TEMPDIR/casefile.out
+if [ $? -ne 0 ] ; then no_result ; fi
+
+activity="compare results"
+diff -b -B $TEMPDIR/casefile.out - <<EOF
+Casefile tests succeeded.
+EOF
+
+if [ $? -ne 0 ] ; then no_result ; fi
+
+
+if [ $? -ne 0 ] ; then fail ; fi
+
+
+
+pass;
cleanup()
{
- #rm -rf $TEMPDIR
+ rm -rf $TEMPDIR
:
}