You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
#include <config.h>
#include "casefile.h"
#include "alloc.h"
#include "case.h"
#include "error.h"
+#include "full-read.h"
+#include "full-write.h"
#include "misc.h"
+#include "mkfile.h"
#include "settings.h"
#include "var.h"
-#ifdef HAVE_VALGRIND_VALGRIND_H
-#include <valgrind/valgrind.h>
-#endif
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
-#define IO_BUF_SIZE 8192
+#define IO_BUF_SIZE (8192 / sizeof (union value))
-/* A casefile is a sequentially accessible array of immutable
- cases. It may be stored in memory or on disk as workspace
- allows. Cases may be appended to the end of the file. Cases
- may be read sequentially starting from the beginning of the
- file. Once any cases have been read, no more cases may be
- appended. The entire file is discarded at once. */
+/* A casefile represents a sequentially accessible stream of
+ immutable cases.
+
+ If workspace allows, a casefile is maintained in memory. If
+ workspace overflows, then the casefile is pushed to disk. In
+ either case the interface presented to callers is kept the
+ same.
+
+ The life cycle of a casefile consists of up to three phases:
+
+ 1. Writing. The casefile initially contains no cases. In
+ this phase, any number of cases may be appended to the
+ end of a casefile. (Cases are never inserted in the
+ middle or before the beginning of a casefile.)
+
+ Use casefile_append() or casefile_append_xfer() to
+ append a case to a casefile.
+
+ 2. Reading. The casefile may be read sequentially,
+ starting from the beginning, by "casereaders". Any
+ number of casereaders may be created, at any time,
+ during the reading phase. Each casereader has an
+ independent position in the casefile.
+
+ Casereaders may only move forward. They cannot move
+ backward to arbitrary records or seek randomly.
+ Cloning casereaders is possible, but it is not yet
+ implemented.
+
+ Use casefile_get_reader() to create a casereader for
+ use in phase 2. This also transitions from phase 1 to
+ phase 2. Calling casefile_mode_reader() makes the same
+ transition, without creating a casereader.
+
+ Use casereader_read(), casereader_read_xfer(), or
+ casereader_read_xfer_assert() to read a case from a
+ casereader. Use casereader_destroy() to discard a
+ casereader when it is no longer needed.
+
+ 3. Destruction. This phase is optional. The casefile is
+ also read with casereaders in this phase, but the
+ ability to create new casereaders is curtailed.
+
+ In this phase, casereaders could still be cloned (once
+ we eventually implement cloning).
+
+ To transition from phase 1 or 2 to phase 3 and create a
+ casereader, call casefile_get_destructive_reader().
+ The same functions apply to the casereader obtained
+ this way as apply to casereaders obtained in phase 2.
+
+ After casefile_get_destructive_reader() is called, no
+ more casereaders may be created with
+ casefile_get_reader() or
+ casefile_get_destructive_reader(). (If cloning of
+ casereaders were implemented, it would still be
+ possible.)
+
+ The purpose of the limitations applied to casereaders
+ in phase 3 is to allow in-memory casefiles to fully
+ transfer ownership of cases to the casereaders,
+ avoiding the need for extra copies of case data. For
+ relatively static data sets with many variables, I
+ suspect (without evidence) that this may be a big
+ performance boost.
+
+ When a casefile is no longer needed, it may be destroyed with
+ casefile_destroy(). This function will also destroy any
+ remaining casereaders. */
/* In-memory cases are arranged in an array of arrays. The top
level is variable size and the size of each bottom level array
/* Basic data. */
struct casefile *next, *prev; /* Next, prev in global list. */
size_t value_cnt; /* Case size in `union value's. */
- size_t case_size; /* Case size in bytes. */
size_t case_acct_size; /* Case size for accounting. */
unsigned long case_cnt; /* Number of cases stored. */
enum { MEMORY, DISK } storage; /* Where cases are stored. */
/* Disk storage. */
int fd; /* File descriptor, -1 if none. */
char *filename; /* Filename. */
- unsigned char *buffer; /* I/O buffer, NULL if none. */
- size_t buffer_used; /* Number of bytes used in buffer. */
- size_t buffer_size; /* Buffer size in bytes. */
+ union value *buffer; /* I/O buffer, NULL if none. */
+ size_t buffer_used; /* Number of values used in buffer. */
+ size_t buffer_size; /* Buffer size in values. */
};
/* For reading out the cases in a casefile. */
/* Disk storage. */
int fd; /* File descriptor. */
- unsigned char *buffer; /* I/O buffer. */
- size_t buffer_pos; /* Byte offset of buffer position. */
+ union value *buffer; /* I/O buffer. */
+ size_t buffer_pos; /* Offset of buffer position. */
struct ccase c; /* Current case. */
};
+/* Return the case number of the current case */
+unsigned long
+casereader_cnum(const struct casereader *r)
+{
+ return r->case_idx;
+}
+
/* Doubly linked list of all casefiles. */
static struct casefile *casefiles;
static int safe_open (const char *filename, int flags);
static int safe_close (int fd);
-static int full_read (int fd, char *buffer, size_t size);
-static int full_write (int fd, const char *buffer, size_t size);
/* Creates and returns a casefile to store cases of VALUE_CNT
`union value's each. */
cf->next->prev = cf;
casefiles = cf;
cf->value_cnt = value_cnt;
- cf->case_size = case_serial_size (value_cnt);
- cf->case_acct_size = cf->case_size + 4 * sizeof (void *);
+ cf->case_acct_size = (cf->value_cnt + 4) * sizeof *cf->buffer;
cf->case_cnt = 0;
cf->storage = MEMORY;
cf->mode = WRITE;
cf->fd = -1;
cf->filename = NULL;
cf->buffer = NULL;
- cf->buffer_size = ROUND_UP (cf->case_size, IO_BUF_SIZE);
- if (cf->case_size > 0 && cf->buffer_size % cf->case_size > 512)
- cf->buffer_size = cf->case_size;
+ cf->buffer_size = ROUND_UP (cf->value_cnt, IO_BUF_SIZE);
+ if (cf->value_cnt > 0 && cf->buffer_size % cf->value_cnt > 64)
+ cf->buffer_size = cf->value_cnt;
cf->buffer_used = 0;
register_atexit ();
return cf;
casefile_mode_reader (cf);
casefile_to_disk (cf);
+ flush_buffer (cf);
if (cf->fd != -1)
{
/* Try memory first. */
if (cf->storage == MEMORY)
{
- if (case_bytes < get_max_workspace ())
+ if (case_bytes < get_workspace ())
{
size_t block_idx = cf->case_cnt / CASES_PER_BLOCK;
size_t case_idx = cf->case_cnt % CASES_PER_BLOCK;
if ((block_idx & (block_idx - 1)) == 0)
{
size_t block_cap = block_idx == 0 ? 1 : block_idx * 2;
- cf->cases = xrealloc (cf->cases,
- sizeof *cf->cases * block_cap);
+ cf->cases = xnrealloc (cf->cases,
+ block_cap, sizeof *cf->cases);
}
- cf->cases[block_idx] = xmalloc (sizeof **cf->cases
- * CASES_PER_BLOCK);
+ cf->cases[block_idx] = xnmalloc (CASES_PER_BLOCK,
+ sizeof **cf->cases);
}
case_move (&cf->cases[block_idx][case_idx], &new_case);
static void
write_case_to_disk (struct casefile *cf, const struct ccase *c)
{
- case_serialize (c, cf->buffer + cf->buffer_used, cf->case_size);
- cf->buffer_used += cf->case_size;
- if (cf->buffer_used + cf->case_size > cf->buffer_size)
+ case_to_values (c, cf->buffer + cf->buffer_used, cf->value_cnt);
+ cf->buffer_used += cf->value_cnt;
+ if (cf->buffer_used + cf->value_cnt > cf->buffer_size)
flush_buffer (cf);
}
{
if (cf->buffer_used > 0)
{
- if (!full_write (cf->fd, cf->buffer, cf->buffer_size))
+ if (!full_write (cf->fd, cf->buffer,
+ cf->buffer_size * sizeof *cf->buffer))
msg (FE, _("Error writing temporary file: %s."), strerror (errno));
cf->buffer_used = 0;
}
}
-/* Creates a temporary file and stores its name in *FILENAME and
- a file descriptor for it in *FD. Returns success. Caller is
- responsible for freeing *FILENAME. */
-static int
-make_temp_file (int *fd, char **filename)
-{
- const char *parent_dir;
-
- assert (filename != NULL);
- assert (fd != NULL);
-
- if (getenv ("TMPDIR") != NULL)
- parent_dir = getenv ("TMPDIR");
- else
- parent_dir = P_tmpdir;
-
- *filename = xmalloc (strlen (parent_dir) + 32);
- sprintf (*filename, "%s%cpsppXXXXXX", parent_dir, DIR_SEPARATOR);
- *fd = mkstemp (*filename);
- if (*fd < 0)
- {
- msg (FE, _("%s: Creating temporary file: %s."),
- *filename, strerror (errno));
- free (*filename);
- *filename = NULL;
- return 0;
- }
- return 1;
-}
/* If CF is currently stored in memory, writes it to disk. Readers, if any,
retain their current positions. */
struct casereader *reader;
assert (cf != NULL);
-
+
if (cf->storage == MEMORY)
{
size_t idx, block_cnt;
cf->storage = DISK;
if (!make_temp_file (&cf->fd, &cf->filename))
err_failure ();
- cf->buffer = xmalloc (cf->buffer_size);
- memset (cf->buffer, 0, cf->buffer_size);
+ cf->buffer = xnmalloc (cf->buffer_size, sizeof *cf->buffer);
+ memset (cf->buffer, 0, cf->buffer_size * sizeof *cf->buffer);
case_bytes -= cf->case_cnt * cf->case_acct_size;
for (idx = 0; idx < cf->case_cnt; idx++)
cf->mode = READ;
reader = xmalloc (sizeof *reader);
- reader->cf = cf;
reader->next = cf->readers;
if (cf->readers != NULL)
reader->next->prev = reader;
- reader->prev = NULL;
cf->readers = reader;
+ reader->prev = NULL;
+ reader->cf = cf;
reader->case_idx = 0;
+ reader->destructive = 0;
reader->fd = -1;
reader->buffer = NULL;
reader->buffer_pos = 0;
reader_open_file (struct casereader *reader)
{
struct casefile *cf = reader->cf;
- size_t buffer_case_cnt;
off_t file_ofs;
if (reader->case_idx >= cf->case_cnt)
}
else
{
- reader->buffer = xmalloc (cf->buffer_size);
- memset (reader->buffer, 0, cf->buffer_size);
+ reader->buffer = xnmalloc (cf->buffer_size, sizeof *cf->buffer);
+ memset (reader->buffer, 0, cf->buffer_size * sizeof *cf->buffer);
}
- if (cf->case_size != 0)
+ if (cf->value_cnt != 0)
{
- buffer_case_cnt = cf->buffer_size / cf->case_size;
- file_ofs = ((off_t) reader->case_idx
- / buffer_case_cnt * cf->buffer_size);
+ size_t buffer_case_cnt = cf->buffer_size / cf->value_cnt;
+ file_ofs = ((off_t) reader->case_idx / buffer_case_cnt
+ * cf->buffer_size * sizeof *cf->buffer);
reader->buffer_pos = (reader->case_idx % buffer_case_cnt
- * cf->case_size);
+ * cf->value_cnt);
}
else
file_ofs = 0;
msg (FE, _("%s: Seeking temporary file: %s."),
cf->filename, strerror (errno));
- if (cf->case_cnt > 0 && cf->case_size > 0)
+ if (cf->case_cnt > 0 && cf->value_cnt > 0)
fill_buffer (reader);
case_create (&reader->c, cf->value_cnt);
static void
fill_buffer (struct casereader *reader)
{
- int retval = full_read (reader->fd, reader->buffer, reader->cf->buffer_size);
+ int retval = full_read (reader->fd, reader->buffer,
+ reader->cf->buffer_size * sizeof *reader->buffer);
if (retval < 0)
msg (FE, _("%s: Reading temporary file: %s."),
reader->cf->filename, strerror (errno));
- else if (retval != reader->cf->buffer_size)
+ else if (retval != reader->cf->buffer_size * sizeof *reader->buffer)
msg (FE, _("%s: Temporary file ended unexpectedly."),
reader->cf->filename);
}
}
/* Reads a copy of the next case from READER into C.
- Caller is responsible for destroying C. */
+ Caller is responsible for destroying C.
+ Returns true if successful, false at end of file. */
int
casereader_read (struct casereader *reader, struct ccase *c)
{
}
else
{
- if (reader->buffer_pos + reader->cf->case_size > reader->cf->buffer_size)
+ if (reader->buffer_pos + reader->cf->value_cnt > reader->cf->buffer_size)
{
fill_buffer (reader);
reader->buffer_pos = 0;
}
- case_unserialize (&reader->c, reader->buffer + reader->buffer_pos,
- reader->cf->case_size);
- reader->buffer_pos += reader->cf->case_size;
+ case_from_values (&reader->c, reader->buffer + reader->buffer_pos,
+ reader->cf->value_cnt);
+ reader->buffer_pos += reader->cf->value_cnt;
reader->case_idx++;
case_clone (c, &reader->c);
}
/* Reads the next case from READER into C and transfers ownership
- to the caller. Caller is responsible for destroying C. */
+ to the caller. Caller is responsible for destroying C.
+ Returns true if successful, false at end of file. */
int
casereader_read_xfer (struct casereader *reader, struct ccase *c)
{
}
}
+/* Reads the next case from READER into C and transfers ownership
+ to the caller. Caller is responsible for destroying C.
+ Assert-fails at end of file. */
+void
+casereader_read_xfer_assert (struct casereader *reader, struct ccase *c)
+{
+ bool success = casereader_read_xfer (reader, c);
+ assert (success);
+}
+
/* Destroys READER. */
void
casereader_destroy (struct casereader *reader)
return retval;
}
-/* Calls read(), passing FD, BUFFER, and SIZE, repeating as
- necessary to deal with interrupted calls. */
-static int
-full_read (int fd, char *buffer, size_t size)
-{
- size_t bytes_read = 0;
-
- while (bytes_read < size)
- {
- int retval = read (fd, buffer + bytes_read, size - bytes_read);
- if (retval > 0)
- bytes_read += retval;
- else if (retval == 0)
- return bytes_read;
- else if (errno != EINTR)
- return -1;
- }
-
- return bytes_read;
-}
-
-/* Calls write(), passing FD, BUFFER, and SIZE, repeating as
- necessary to deal with interrupted calls. */
-static int
-full_write (int fd, const char *buffer, size_t size)
-{
- size_t bytes_written = 0;
-
- while (bytes_written < size)
- {
- int retval = write (fd, buffer + bytes_written, size - bytes_written);
- if (retval >= 0)
- bytes_written += retval;
- else if (errno != EINTR)
- return -1;
- }
-
- return bytes_written;
-}
-
/* Registers our exit handler with atexit() if it has not already
been registered. */
static void
}
}
+
+
/* atexit() handler that closes and deletes our temporary
files. */
static void
while (casefiles != NULL)
casefile_destroy (casefiles);
}
-\f
-#include <stdarg.h>
-#include "command.h"
-#include "random.h"
-#include "lexer.h"
-
-static void test_casefile (int pattern, size_t value_cnt, size_t case_cnt);
-static void get_random_case (struct ccase *, size_t value_cnt,
- size_t case_idx);
-static void write_random_case (struct casefile *cf, size_t case_idx);
-static void read_and_verify_random_case (struct casefile *cf,
- struct casereader *reader,
- size_t case_idx);
-static void fail_test (const char *message, ...);
-
-int
-cmd_debug_casefile (void)
-{
- static const size_t sizes[] =
- {
- 1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 31, 55, 73,
- 100, 137, 257, 521, 1031, 2053
- };
- int size_max;
- int case_max;
- int pattern;
-
- size_max = sizeof sizes / sizeof *sizes;
- if (lex_match_id ("SMALL"))
- {
- size_max -= 4;
- case_max = 511;
- }
- else
- case_max = 4095;
- if (token != '.')
- return lex_end_of_command ();
-
- for (pattern = 0; pattern < 5; pattern++)
- {
- const size_t *size;
-
- for (size = sizes; size < sizes + size_max; size++)
- {
- size_t case_cnt;
-
- for (case_cnt = 0; case_cnt <= case_max;
- case_cnt = (case_cnt * 2) + 1)
- test_casefile (pattern, *size, case_cnt);
- }
- }
- printf ("Casefile tests succeeded.\n");
- return CMD_SUCCESS;
-}
-
-static void
-test_casefile (int pattern, size_t value_cnt, size_t case_cnt)
-{
- int zero = 0;
- struct casefile *cf;
- struct casereader *r1, *r2;
- struct ccase c;
- struct rng *rng;
- size_t i, j;
-
- rng = rng_create ();
- rng_seed (rng, &zero, sizeof zero);
- cf = casefile_create (value_cnt);
- for (i = 0; i < case_cnt; i++)
- write_random_case (cf, i);
- r1 = casefile_get_reader (cf);
- r2 = casefile_get_reader (cf);
- switch (pattern)
- {
- case 0:
- for (i = 0; i < case_cnt; i++)
- {
- read_and_verify_random_case (cf, r1, i);
- read_and_verify_random_case (cf, r2, i);
- }
- break;
- case 1:
- for (i = 0; i < case_cnt; i++)
- read_and_verify_random_case (cf, r1, i);
- for (i = 0; i < case_cnt; i++)
- read_and_verify_random_case (cf, r2, i);
- break;
- case 2:
- case 3:
- case 4:
- for (i = j = 0; i < case_cnt; i++)
- {
- read_and_verify_random_case (cf, r1, i);
- if (rng_get_int (rng) % pattern == 0)
- read_and_verify_random_case (cf, r2, j++);
- if (i == case_cnt / 2)
- casefile_to_disk (cf);
- }
- for (; j < case_cnt; j++)
- read_and_verify_random_case (cf, r2, j);
- break;
- }
- if (casereader_read (r1, &c))
- fail_test ("Casereader 1 not at end of file.");
- if (casereader_read (r2, &c))
- fail_test ("Casereader 2 not at end of file.");
- if (pattern != 1)
- casereader_destroy (r1);
- if (pattern != 2)
- casereader_destroy (r2);
- if (pattern > 2)
- {
- r1 = casefile_get_destructive_reader (cf);
- for (i = 0; i < case_cnt; i++)
- {
- struct ccase read_case, expected_case;
-
- get_random_case (&expected_case, value_cnt, i);
- if (!casereader_read_xfer (r1, &read_case))
- fail_test ("Premature end of casefile.");
- for (j = 0; j < value_cnt; j++)
- {
- double a = case_num (&read_case, j);
- double b = case_num (&expected_case, j);
- if (a != b)
- fail_test ("Case %lu fails comparison.", (unsigned long) i);
- }
- case_destroy (&expected_case);
- case_destroy (&read_case);
- }
- casereader_destroy (r1);
- }
- casefile_destroy (cf);
- rng_destroy (rng);
-}
-
-static void
-get_random_case (struct ccase *c, size_t value_cnt, size_t case_idx)
-{
- int i;
- case_create (c, value_cnt);
- for (i = 0; i < value_cnt; i++)
- case_data_rw (c, i)->f = case_idx % 257 + i;
-}
-
-static void
-write_random_case (struct casefile *cf, size_t case_idx)
-{
- struct ccase c;
- get_random_case (&c, casefile_get_value_cnt (cf), case_idx);
- casefile_append_xfer (cf, &c);
-}
-
-static void
-read_and_verify_random_case (struct casefile *cf,
- struct casereader *reader, size_t case_idx)
-{
- struct ccase read_case, expected_case;
- size_t value_cnt;
- size_t i;
-
- value_cnt = casefile_get_value_cnt (cf);
- get_random_case (&expected_case, value_cnt, case_idx);
- if (!casereader_read (reader, &read_case))
- fail_test ("Premature end of casefile.");
- for (i = 0; i < value_cnt; i++)
- {
- double a = case_num (&read_case, i);
- double b = case_num (&expected_case, i);
- if (a != b)
- fail_test ("Case %lu fails comparison.", (unsigned long) case_idx);
- }
- case_destroy (&read_case);
- case_destroy (&expected_case);
-}
-
-static void
-fail_test (const char *message, ...)
-{
- va_list args;
-
- va_start (args, message);
- vprintf (message, args);
- putchar ('\n');
- va_end (args);
-
- exit (1);
-}