You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
#include <config.h>
#include "casefile.h"
#include "alloc.h"
#include "case.h"
#include "error.h"
+#include "full-read.h"
+#include "full-write.h"
#include "misc.h"
+#include "mkfile.h"
#include "settings.h"
#include "var.h"
-#ifdef HAVE_VALGRIND_VALGRIND_H
-#include <valgrind/valgrind.h>
-#endif
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
-#define IO_BUF_SIZE 8192
+#define IO_BUF_SIZE (8192 / sizeof (union value))
-/* A casefile is a sequentially accessible array of immutable
- cases. It may be stored in memory or on disk as workspace
- allows. Cases may be appended to the end of the file. Cases
- may be read sequentially starting from the beginning of the
- file. Once any cases have been read, no more cases may be
- appended. The entire file is discarded at once. */
+/* A casefile represents a sequentially accessible stream of
+ immutable cases.
+
+ If workspace allows, a casefile is maintained in memory. If
+ workspace overflows, then the casefile is pushed to disk. In
+ either case the interface presented to callers is kept the
+ same.
+
+ The life cycle of a casefile consists of up to three phases:
+
+ 1. Writing. The casefile initially contains no cases. In
+ this phase, any number of cases may be appended to the
+ end of a casefile. (Cases are never inserted in the
+ middle or before the beginning of a casefile.)
+
+ Use casefile_append() or casefile_append_xfer() to
+ append a case to a casefile.
+
+ 2. Reading. The casefile may be read sequentially,
+ starting from the beginning, by "casereaders". Any
+ number of casereaders may be created, at any time,
+ during the reading phase. Each casereader has an
+ independent position in the casefile.
+
+ Casereaders may only move forward. They cannot move
+ backward to arbitrary records or seek randomly.
+ Cloning casereaders is possible, but it is not yet
+ implemented.
+
+ Use casefile_get_reader() to create a casereader for
+ use in phase 2. This also transitions from phase 1 to
+ phase 2. Calling casefile_mode_reader() makes the same
+ transition, without creating a casereader.
+
+ Use casereader_read(), casereader_read_xfer(), or
+ casereader_read_xfer_assert() to read a case from a
+ casereader. Use casereader_destroy() to discard a
+ casereader when it is no longer needed.
+
+ 3. Destruction. This phase is optional. The casefile is
+ also read with casereaders in this phase, but the
+ ability to create new casereaders is curtailed.
+
+ In this phase, casereaders could still be cloned (once
+ we eventually implement cloning).
+
+ To transition from phase 1 or 2 to phase 3 and create a
+ casereader, call casefile_get_destructive_reader().
+ The same functions apply to the casereader obtained
+ this way as apply to casereaders obtained in phase 2.
+
+ After casefile_get_destructive_reader() is called, no
+ more casereaders may be created with
+ casefile_get_reader() or
+ casefile_get_destructive_reader(). (If cloning of
+ casereaders were implemented, it would still be
+ possible.)
+
+ The purpose of the limitations applied to casereaders
+ in phase 3 is to allow in-memory casefiles to fully
+ transfer ownership of cases to the casereaders,
+ avoiding the need for extra copies of case data. For
+ relatively static data sets with many variables, I
+ suspect (without evidence) that this may be a big
+ performance boost.
+
+ When a casefile is no longer needed, it may be destroyed with
+ casefile_destroy(). This function will also destroy any
+ remaining casereaders. */
/* In-memory cases are arranged in an array of arrays. The top
level is variable size and the size of each bottom level array
/* Basic data. */
struct casefile *next, *prev; /* Next, prev in global list. */
size_t value_cnt; /* Case size in `union value's. */
- size_t case_size; /* Case size in bytes. */
size_t case_acct_size; /* Case size for accounting. */
unsigned long case_cnt; /* Number of cases stored. */
enum { MEMORY, DISK } storage; /* Where cases are stored. */
/* Disk storage. */
int fd; /* File descriptor, -1 if none. */
char *filename; /* Filename. */
- unsigned char *buffer; /* I/O buffer, NULL if none. */
- size_t buffer_used; /* Number of bytes used in buffer. */
- size_t buffer_size; /* Buffer size in bytes. */
+ union value *buffer; /* I/O buffer, NULL if none. */
+ size_t buffer_used; /* Number of values used in buffer. */
+ size_t buffer_size; /* Buffer size in values. */
};
/* For reading out the cases in a casefile. */
/* Disk storage. */
int fd; /* File descriptor. */
- unsigned char *buffer; /* I/O buffer. */
- size_t buffer_pos; /* Byte offset of buffer position. */
+ union value *buffer; /* I/O buffer. */
+ size_t buffer_pos; /* Offset of buffer position. */
struct ccase c; /* Current case. */
};
+/* Return the case number of the current case */
+unsigned long
+casereader_cnum(const struct casereader *r)
+{
+ return r->case_idx;
+}
+
/* Doubly linked list of all casefiles. */
static struct casefile *casefiles;
static int safe_open (const char *filename, int flags);
static int safe_close (int fd);
-static int full_read (int fd, char *buffer, size_t size);
-static int full_write (int fd, const char *buffer, size_t size);
/* Creates and returns a casefile to store cases of VALUE_CNT
`union value's each. */
cf->next->prev = cf;
casefiles = cf;
cf->value_cnt = value_cnt;
- cf->case_size = case_serial_size (value_cnt);
- cf->case_acct_size = cf->case_size + 4 * sizeof (void *);
+ cf->case_acct_size = (cf->value_cnt + 4) * sizeof *cf->buffer;
cf->case_cnt = 0;
cf->storage = MEMORY;
cf->mode = WRITE;
cf->fd = -1;
cf->filename = NULL;
cf->buffer = NULL;
- cf->buffer_size = ROUND_UP (cf->case_size, IO_BUF_SIZE);
- if (cf->case_size > 0 && cf->buffer_size % cf->case_size > 512)
- cf->buffer_size = cf->case_size;
+ cf->buffer_size = ROUND_UP (cf->value_cnt, IO_BUF_SIZE);
+ if (cf->value_cnt > 0 && cf->buffer_size % cf->value_cnt > 64)
+ cf->buffer_size = cf->value_cnt;
cf->buffer_used = 0;
register_atexit ();
return cf;
casefile_mode_reader (cf);
casefile_to_disk (cf);
+ flush_buffer (cf);
if (cf->fd != -1)
{
static void
write_case_to_disk (struct casefile *cf, const struct ccase *c)
{
- case_serialize (c, cf->buffer + cf->buffer_used, cf->case_size);
- cf->buffer_used += cf->case_size;
- if (cf->buffer_used + cf->case_size > cf->buffer_size)
+ case_to_values (c, cf->buffer + cf->buffer_used, cf->value_cnt);
+ cf->buffer_used += cf->value_cnt;
+ if (cf->buffer_used + cf->value_cnt > cf->buffer_size)
flush_buffer (cf);
}
{
if (cf->buffer_used > 0)
{
- if (!full_write (cf->fd, cf->buffer, cf->buffer_size))
+ if (!full_write (cf->fd, cf->buffer,
+ cf->buffer_size * sizeof *cf->buffer))
msg (FE, _("Error writing temporary file: %s."), strerror (errno));
cf->buffer_used = 0;
}
}
-/* Creates a temporary file and stores its name in *FILENAME and
- a file descriptor for it in *FD. Returns success. Caller is
- responsible for freeing *FILENAME. */
-static int
-make_temp_file (int *fd, char **filename)
-{
- const char *parent_dir;
-
- assert (filename != NULL);
- assert (fd != NULL);
-
- if (getenv ("TMPDIR") != NULL)
- parent_dir = getenv ("TMPDIR");
- else
- parent_dir = P_tmpdir;
-
- *filename = xmalloc (strlen (parent_dir) + 32);
- sprintf (*filename, "%s%cpsppXXXXXX", parent_dir, DIR_SEPARATOR);
- *fd = mkstemp (*filename);
- if (*fd < 0)
- {
- msg (FE, _("%s: Creating temporary file: %s."),
- *filename, strerror (errno));
- free (*filename);
- *filename = NULL;
- return 0;
- }
- return 1;
-}
/* If CF is currently stored in memory, writes it to disk. Readers, if any,
retain their current positions. */
struct casereader *reader;
assert (cf != NULL);
-
+
if (cf->storage == MEMORY)
{
size_t idx, block_cnt;
cf->storage = DISK;
if (!make_temp_file (&cf->fd, &cf->filename))
err_failure ();
- cf->buffer = xmalloc (cf->buffer_size);
- memset (cf->buffer, 0, cf->buffer_size);
+ cf->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer);
+ memset (cf->buffer, 0, cf->buffer_size * sizeof *cf->buffer);
case_bytes -= cf->case_cnt * cf->case_acct_size;
for (idx = 0; idx < cf->case_cnt; idx++)
cf->mode = READ;
reader = xmalloc (sizeof *reader);
- reader->cf = cf;
reader->next = cf->readers;
if (cf->readers != NULL)
reader->next->prev = reader;
- reader->prev = NULL;
cf->readers = reader;
+ reader->prev = NULL;
+ reader->cf = cf;
reader->case_idx = 0;
+ reader->destructive = 0;
reader->fd = -1;
reader->buffer = NULL;
reader->buffer_pos = 0;
reader_open_file (struct casereader *reader)
{
struct casefile *cf = reader->cf;
- size_t buffer_case_cnt;
off_t file_ofs;
if (reader->case_idx >= cf->case_cnt)
}
else
{
- reader->buffer = xmalloc (cf->buffer_size);
- memset (reader->buffer, 0, cf->buffer_size);
+ reader->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer);
+ memset (reader->buffer, 0, cf->buffer_size * sizeof *cf->buffer);
}
- if (cf->case_size != 0)
+ if (cf->value_cnt != 0)
{
- buffer_case_cnt = cf->buffer_size / cf->case_size;
- file_ofs = ((off_t) reader->case_idx
- / buffer_case_cnt * cf->buffer_size);
+ size_t buffer_case_cnt = cf->buffer_size / cf->value_cnt;
+ file_ofs = ((off_t) reader->case_idx / buffer_case_cnt
+ * cf->buffer_size * sizeof *cf->buffer);
reader->buffer_pos = (reader->case_idx % buffer_case_cnt
- * cf->case_size);
+ * cf->value_cnt);
}
else
file_ofs = 0;
msg (FE, _("%s: Seeking temporary file: %s."),
cf->filename, strerror (errno));
- if (cf->case_cnt > 0 && cf->case_size > 0)
+ if (cf->case_cnt > 0 && cf->value_cnt > 0)
fill_buffer (reader);
case_create (&reader->c, cf->value_cnt);
static void
fill_buffer (struct casereader *reader)
{
- int retval = full_read (reader->fd, reader->buffer, reader->cf->buffer_size);
+ int retval = full_read (reader->fd, reader->buffer,
+ reader->cf->buffer_size * sizeof *reader->buffer);
if (retval < 0)
msg (FE, _("%s: Reading temporary file: %s."),
reader->cf->filename, strerror (errno));
- else if (retval != reader->cf->buffer_size)
+ else if (retval != reader->cf->buffer_size * sizeof *reader->buffer)
msg (FE, _("%s: Temporary file ended unexpectedly."),
reader->cf->filename);
}
}
/* Reads a copy of the next case from READER into C.
- Caller is responsible for destroying C. */
+ Caller is responsible for destroying C.
+ Returns true if successful, false at end of file. */
int
casereader_read (struct casereader *reader, struct ccase *c)
{
}
else
{
- if (reader->buffer_pos + reader->cf->case_size > reader->cf->buffer_size)
+ if (reader->buffer_pos + reader->cf->value_cnt > reader->cf->buffer_size)
{
fill_buffer (reader);
reader->buffer_pos = 0;
}
- case_unserialize (&reader->c, reader->buffer + reader->buffer_pos,
- reader->cf->case_size);
- reader->buffer_pos += reader->cf->case_size;
+ case_from_values (&reader->c, reader->buffer + reader->buffer_pos,
+ reader->cf->value_cnt);
+ reader->buffer_pos += reader->cf->value_cnt;
reader->case_idx++;
case_clone (c, &reader->c);
}
/* Reads the next case from READER into C and transfers ownership
- to the caller. Caller is responsible for destroying C. */
+ to the caller. Caller is responsible for destroying C.
+ Returns true if successful, false at end of file. */
int
casereader_read_xfer (struct casereader *reader, struct ccase *c)
{
}
}
+/* Reads the next case from READER into C and transfers ownership
+ to the caller. Caller is responsible for destroying C.
+ Assert-fails at end of file. */
+void
+casereader_read_xfer_assert (struct casereader *reader, struct ccase *c)
+{
+ bool success = casereader_read_xfer (reader, c);
+ assert (success);
+}
+
/* Destroys READER. */
void
casereader_destroy (struct casereader *reader)
return retval;
}
-/* Calls read(), passing FD, BUFFER, and SIZE, repeating as
- necessary to deal with interrupted calls. */
-static int
-full_read (int fd, char *buffer, size_t size)
-{
- size_t bytes_read = 0;
-
- while (bytes_read < size)
- {
- int retval = read (fd, buffer + bytes_read, size - bytes_read);
- if (retval > 0)
- bytes_read += retval;
- else if (retval == 0)
- return bytes_read;
- else if (errno != EINTR)
- return -1;
- }
-
- return bytes_read;
-}
-
-/* Calls write(), passing FD, BUFFER, and SIZE, repeating as
- necessary to deal with interrupted calls. */
-static int
-full_write (int fd, const char *buffer, size_t size)
-{
- size_t bytes_written = 0;
-
- while (bytes_written < size)
- {
- int retval = write (fd, buffer + bytes_written, size - bytes_written);
- if (retval >= 0)
- bytes_written += retval;
- else if (errno != EINTR)
- return -1;
- }
-
- return bytes_written;
-}
-
/* Registers our exit handler with atexit() if it has not already
been registered. */
static void
}
}
+
+
/* atexit() handler that closes and deletes our temporary
files. */
static void
casefile_destroy (casefiles);
}
\f
+#include <gsl/gsl_rng.h>
#include <stdarg.h>
#include "command.h"
-#include "random.h"
#include "lexer.h"
static void test_casefile (int pattern, size_t value_cnt, size_t case_cnt);
if (token != '.')
return lex_end_of_command ();
- for (pattern = 0; pattern < 5; pattern++)
+ for (pattern = 0; pattern < 6; pattern++)
{
const size_t *size;
static void
test_casefile (int pattern, size_t value_cnt, size_t case_cnt)
{
- int zero = 0;
struct casefile *cf;
struct casereader *r1, *r2;
struct ccase c;
- struct rng *rng;
+ gsl_rng *rng;
size_t i, j;
- rng = rng_create ();
- rng_seed (rng, &zero, sizeof zero);
+ rng = gsl_rng_alloc (gsl_rng_mt19937);
cf = casefile_create (value_cnt);
+ if (pattern == 5)
+ casefile_to_disk (cf);
for (i = 0; i < case_cnt; i++)
write_random_case (cf, i);
+ if (pattern == 5)
+ casefile_sleep (cf);
r1 = casefile_get_reader (cf);
r2 = casefile_get_reader (cf);
switch (pattern)
{
case 0:
+ case 5:
for (i = 0; i < case_cnt; i++)
{
read_and_verify_random_case (cf, r1, i);
for (i = j = 0; i < case_cnt; i++)
{
read_and_verify_random_case (cf, r1, i);
- if (rng_get_int (rng) % pattern == 0)
- read_and_verify_random_case (cf, r2, j++);
+ if (gsl_rng_get (rng) % pattern == 0)
+ read_and_verify_random_case (cf, r2, j++);
if (i == case_cnt / 2)
casefile_to_disk (cf);
}
casereader_destroy (r1);
}
casefile_destroy (cf);
- rng_destroy (rng);
+ gsl_rng_free (rng);
}
static void