--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+#include <config.h>
+
+#include <data/sparse-cases.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <data/settings.h>
+#include <data/case-tmpfile.h>
+#include <libpspp/assertion.h>
+#include <libpspp/range-set.h>
+#include <libpspp/sparse-array.h>
+
+#include "xalloc.h"
+
+/* A sparse array of cases. */
+struct sparse_cases
+ {
+ size_t column_cnt; /* Number of values per case. */
+ union value *default_columns; /* Defaults for unwritten cases. */
+ casenumber max_memory_cases; /* Max cases before dumping to disk. */
+ struct sparse_array *memory; /* Backing, if stored in memory. */
+ struct case_tmpfile *disk; /* Backing, if stored on disk. */
+ struct range_set *disk_cases; /* Allocated cases, if on disk. */
+ };
+
+/* Creates and returns a new sparse array of cases with
+ COLUMN_CNT values per case. */
+struct sparse_cases *
+sparse_cases_create (size_t column_cnt)
+{
+ struct sparse_cases *sc = xmalloc (sizeof *sc);
+ sc->column_cnt = column_cnt;
+ sc->default_columns = NULL;
+ sc->max_memory_cases = get_workspace_cases (column_cnt);
+ sc->memory = sparse_array_create (sizeof (struct ccase));
+ sc->disk = NULL;
+ sc->disk_cases = NULL;
+ return sc;
+}
+
+/* Creates and returns a new sparse array of cases that contains
+ the same data as OLD. */
+struct sparse_cases *
+sparse_cases_clone (const struct sparse_cases *old)
+{
+ struct sparse_cases *new = xmalloc (sizeof *new);
+
+ new->column_cnt = old->column_cnt;
+
+ if (old->default_columns != NULL)
+ new->default_columns
+ = xmemdup (old->default_columns,
+ old->column_cnt * sizeof *old->default_columns);
+ else
+ new->default_columns = NULL;
+
+ new->max_memory_cases = old->max_memory_cases;
+
+ if (old->memory != NULL)
+ {
+ unsigned long int idx;
+ struct ccase *c;
+
+ new->memory = sparse_array_create (sizeof (struct ccase));
+ for (c = sparse_array_scan (old->memory, NULL, &idx); c != NULL;
+ c = sparse_array_scan (old->memory, &idx, &idx))
+ case_clone (sparse_array_insert (new->memory, idx), c);
+ }
+ else
+ new->memory = NULL;
+
+ if (old->disk != NULL)
+ {
+ const struct range_set_node *node;
+
+ new->disk = case_tmpfile_create (old->column_cnt);
+ new->disk_cases = range_set_create ();
+ for (node = range_set_first (old->disk_cases); node != NULL;
+ node = range_set_next (old->disk_cases, node))
+ {
+ unsigned long int start = range_set_node_get_start (node);
+ unsigned long int end = range_set_node_get_end (node);
+ unsigned long int idx;
+ struct ccase c;
+
+ for (idx = start; idx < end; idx++)
+ if (!case_tmpfile_get_case (old->disk, idx, &c)
+ || !case_tmpfile_put_case (new->disk, idx, &c))
+ {
+ sparse_cases_destroy (new);
+ return NULL;
+ }
+ }
+ }
+ else
+ {
+ new->disk = NULL;
+ new->disk_cases = NULL;
+ }
+
+ return new;
+}
+
+/* Destroys sparse array of cases SC. */
+void
+sparse_cases_destroy (struct sparse_cases *sc)
+{
+ if (sc != NULL)
+ {
+ if (sc->memory != NULL)
+ {
+ unsigned long int idx;
+ struct ccase *c;
+ for (c = sparse_array_scan (sc->memory, NULL, &idx); c != NULL;
+ c = sparse_array_scan (sc->memory, &idx, &idx))
+ case_destroy (c);
+ sparse_array_destroy (sc->memory);
+ }
+ free (sc->default_columns);
+ case_tmpfile_destroy (sc->disk);
+ range_set_destroy (sc->disk_cases);
+ free (sc);
+ }
+}
+
+/* Returns the number of `union value's in each case in SC. */
+size_t
+sparse_cases_get_value_cnt (const struct sparse_cases *sc)
+{
+ return sc->column_cnt;
+}
+
+/* Dumps the cases in SC, which must currently be stored in
+ memory, to disk. Returns true if successful, false on I/O
+ error. */
+static bool
+dump_sparse_cases_to_disk (struct sparse_cases *sc)
+{
+ unsigned long int idx;
+ struct ccase *c;
+
+ assert (sc->memory != NULL);
+ assert (sc->disk == NULL);
+
+ sc->disk = case_tmpfile_create (sc->column_cnt);
+ sc->disk_cases = range_set_create ();
+
+ for (c = sparse_array_scan (sc->memory, NULL, &idx); c != NULL;
+ c = sparse_array_scan (sc->memory, &idx, &idx))
+ {
+ if (!case_tmpfile_put_case (sc->disk, idx, c))
+ {
+ case_tmpfile_destroy (sc->disk);
+ sc->disk = NULL;
+ range_set_destroy (sc->disk_cases);
+ sc->disk_cases = NULL;
+ return false;
+ }
+ range_set_insert (sc->disk_cases, idx, 1);
+ }
+ sparse_array_destroy (sc->memory);
+ sc->memory = NULL;
+ return true;
+}
+
+/* Returns true if any data has ever been written to ROW in SC,
+ false otherwise. */
+bool
+sparse_cases_contains_row (const struct sparse_cases *sc, casenumber row)
+{
+ return (sc->memory != NULL
+ ? sparse_array_get (sc->memory, row) != NULL
+ : range_set_contains (sc->disk_cases, row));
+}
+
+/* Reads columns COLUMNS...(COLUMNS + VALUE_CNT), exclusive, in
+ the given ROW in SC, into the VALUE_CNT values in VALUES.
+ Returns true if successful, false on I/O error. */
+bool
+sparse_cases_read (struct sparse_cases *sc, casenumber row, size_t column,
+ union value values[], size_t value_cnt)
+{
+ assert (value_cnt <= sc->column_cnt);
+ assert (column + value_cnt <= sc->column_cnt);
+
+ if (sparse_cases_contains_row (sc, row))
+ {
+ struct ccase c;
+ if (sc->memory != NULL)
+ case_clone (&c, sparse_array_get (sc->memory, row));
+ else if (!case_tmpfile_get_case (sc->disk, row, &c))
+ return false;
+ case_copy_out (&c, column, values, value_cnt);
+ case_destroy (&c);
+ }
+ else
+ {
+ assert (sc->default_columns != NULL);
+ memcpy (values, sc->default_columns + column,
+ sizeof *values * value_cnt);
+ }
+
+ return true;
+}
+
+/* Implements sparse_cases_write for an on-disk sparse_cases. */
+static bool
+write_disk_case (struct sparse_cases *sc, casenumber row, size_t column,
+ const union value values[], size_t value_cnt)
+{
+ struct ccase c;
+ bool ok;
+
+ /* Get current case data. */
+ if (column == 0 && value_cnt == sc->column_cnt)
+ case_create (&c, sc->column_cnt);
+ else if (!case_tmpfile_get_case (sc->disk, row, &c))
+ return false;
+
+ /* Copy in new data. */
+ case_copy_in (&c, column, values, value_cnt);
+
+ /* Write new case. */
+ ok = case_tmpfile_put_case (sc->disk, row, &c);
+ if (ok)
+ range_set_insert (sc->disk_cases, row, 1);
+
+ return ok;
+}
+
+/* Writes the VALUE_CNT values in VALUES into columns
+ COLUMNS...(COLUMNS + VALUE_CNT), exclusive, in the given ROW
+ in SC.
+ Returns true if successful, false on I/O error. */
+bool
+sparse_cases_write (struct sparse_cases *sc, casenumber row, size_t column,
+ const union value values[], size_t value_cnt)
+{
+ if (sc->memory != NULL)
+ {
+ struct ccase *c = sparse_array_get (sc->memory, row);
+ if (c == NULL)
+ {
+ if (sparse_array_count (sc->memory) >= sc->max_memory_cases)
+ {
+ if (!dump_sparse_cases_to_disk (sc))
+ return false;
+ return write_disk_case (sc, row, column, values, value_cnt);
+ }
+
+ c = sparse_array_insert (sc->memory, row);
+ case_create (c, sc->column_cnt);
+ if (sc->default_columns != NULL
+ && (column != 0 || value_cnt != sc->column_cnt))
+ case_copy_in (c, 0, sc->default_columns, sc->column_cnt);
+ }
+ case_copy_in (c, column, values, value_cnt);
+ return true;
+ }
+ else
+ return write_disk_case (sc, row, column, values, value_cnt);
+}
+
+/* Writes the VALUE_CNT values in VALUES to columns
+ START_COLUMN...(START_COLUMN + VALUE_CNT), exclusive, in every
+ row in SC, even those rows that have not yet been written.
+ Returns true if successful, false on I/O error.
+
+ The runtime of this function is linear in the number of rows
+ in SC that have already been written. */
+bool
+sparse_cases_write_columns (struct sparse_cases *sc, size_t start_column,
+ const union value values[], size_t value_cnt)
+{
+ assert (value_cnt <= sc->column_cnt);
+ assert (start_column + value_cnt <= sc->column_cnt);
+
+ /* Set defaults. */
+ if (sc->default_columns == NULL)
+ sc->default_columns = xnmalloc (sc->column_cnt,
+ sizeof *sc->default_columns);
+ memcpy (sc->default_columns + start_column, values,
+ value_cnt * sizeof *sc->default_columns);
+
+ /* Set individual rows. */
+ if (sc->memory != NULL)
+ {
+ struct ccase *c;
+ unsigned long int idx;
+
+ for (c = sparse_array_scan (sc->memory, NULL, &idx); c != NULL;
+ c = sparse_array_scan (sc->memory, &idx, &idx))
+ case_copy_in (c, start_column, values, value_cnt);
+ }
+ else
+ {
+ const struct range_set_node *node;
+
+ for (node = range_set_first (sc->disk_cases); node != NULL;
+ node = range_set_next (sc->disk_cases, node))
+ {
+ unsigned long int start = range_set_node_get_start (node);
+ unsigned long int end = range_set_node_get_end (node);
+ unsigned long int row;
+
+ for (row = start; row < end; row++)
+ case_tmpfile_put_values (sc->disk, row,
+ start_column, values, value_cnt);
+ }
+
+ if (case_tmpfile_error (sc->disk))
+ return false;
+ }
+ return true;
+}
--- /dev/null
+/* PSPP - computes sample statistics.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Sparse array of cases.
+
+ Implements a 2-d sparse array in which each row represents a
+ case, each column represents a variable, and each intersection
+ contains a `union value'. Data in the array may be accessed
+ randomly by column and row. When the number of cases stored
+ in the array is small, the data is stored in memory in memory;
+ when it is large, the data is stored in a temporary file.
+
+ The sparse_cases_write_columns function provides a somewhat
+ unusual ability: to write a given value to every row in a
+ column or set of columns. This overwrites any values
+ previously written into those columns. For rows that have
+ never been written, this function sets "default" values that
+ later writes can override.
+
+ The array keeps track of which row have been written. If
+ sparse_cases_write_columns has been used, reading from a row
+ that has never been written yields the default values;
+ otherwise, reading from such a row in an error. It is
+ permissible to write to only some columns in a row and leave
+ the rest of the row's data undefined (or, if
+ sparse_cases_write_columns has been used, at the default
+ values). The array does not keep track of which columns in a
+ row have never been written, but reading values that have
+ never been written or set as defaults yields undefined
+ behavior. */
+
+#ifndef DATA_SPARSE_CASES_H
+#define DATA_SPARSE_CASES_H 1
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <data/case.h>
+
+struct sparse_cases *sparse_cases_create (size_t value_cnt);
+struct sparse_cases *sparse_cases_clone (const struct sparse_cases *);
+void sparse_cases_destroy (struct sparse_cases *);
+
+size_t sparse_cases_get_value_cnt (const struct sparse_cases *);
+
+bool sparse_cases_contains_row (const struct sparse_cases *, casenumber row);
+bool sparse_cases_read (struct sparse_cases *, casenumber row, size_t column,
+ union value[], size_t value_cnt);
+bool sparse_cases_write (struct sparse_cases *, casenumber row, size_t column,
+ const union value[], size_t value_cnt);
+bool sparse_cases_write_columns (struct sparse_cases *, size_t start_column,
+ const union value[], size_t value_cnt);
+
+#endif /* data/sparse-cases.h */