You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
#include <config.h>
#include "casefile.h"
#include "alloc.h"
#include "case.h"
#include "error.h"
+#include "full-read.h"
+#include "full-write.h"
#include "misc.h"
+#include "mkfile.h"
#include "settings.h"
#include "var.h"
-#ifdef HAVE_VALGRIND_VALGRIND_H
-#include <valgrind/valgrind.h>
-#endif
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
#define IO_BUF_SIZE (8192 / sizeof (union value))
-/* A casefile is a sequentially accessible array of immutable
- cases. It may be stored in memory or on disk as workspace
- allows. Cases may be appended to the end of the file. Cases
- may be read sequentially starting from the beginning of the
- file. Once any cases have been read, no more cases may be
- appended. The entire file is discarded at once. */
+/* A casefile represents a sequentially accessible stream of
+ immutable cases.
+
+ If workspace allows, a casefile is maintained in memory. If
+ workspace overflows, then the casefile is pushed to disk. In
+ either case the interface presented to callers is kept the
+ same.
+
+ The life cycle of a casefile consists of up to three phases:
+
+ 1. Writing. The casefile initially contains no cases. In
+ this phase, any number of cases may be appended to the
+ end of a casefile. (Cases are never inserted in the
+ middle or before the beginning of a casefile.)
+
+ Use casefile_append() or casefile_append_xfer() to
+ append a case to a casefile.
+
+ 2. Reading. The casefile may be read sequentially,
+ starting from the beginning, by "casereaders". Any
+ number of casereaders may be created, at any time,
+ during the reading phase. Each casereader has an
+ independent position in the casefile.
+
+ Casereaders may only move forward. They cannot move
+ backward to arbitrary records or seek randomly.
+ Cloning casereaders is possible, but it is not yet
+ implemented.
+
+ Use casefile_get_reader() to create a casereader for
+ use in phase 2. This also transitions from phase 1 to
+ phase 2. Calling casefile_mode_reader() makes the same
+ transition, without creating a casereader.
+
+ Use casereader_read(), casereader_read_xfer(), or
+ casereader_read_xfer_assert() to read a case from a
+ casereader. Use casereader_destroy() to discard a
+ casereader when it is no longer needed.
+
+ 3. Destruction. This phase is optional. The casefile is
+ also read with casereaders in this phase, but the
+ ability to create new casereaders is curtailed.
+
+ In this phase, casereaders could still be cloned (once
+ we eventually implement cloning).
+
+ To transition from phase 1 or 2 to phase 3 and create a
+ casereader, call casefile_get_destructive_reader().
+ The same functions apply to the casereader obtained
+ this way as apply to casereaders obtained in phase 2.
+
+ After casefile_get_destructive_reader() is called, no
+ more casereaders may be created with
+ casefile_get_reader() or
+ casefile_get_destructive_reader(). (If cloning of
+ casereaders were implemented, it would still be
+ possible.)
+
+ The purpose of the limitations applied to casereaders
+ in phase 3 is to allow in-memory casefiles to fully
+ transfer ownership of cases to the casereaders,
+ avoiding the need for extra copies of case data. For
+ relatively static data sets with many variables, I
+ suspect (without evidence) that this may be a big
+ performance boost.
+
+ When a casefile is no longer needed, it may be destroyed with
+ casefile_destroy(). This function will also destroy any
+ remaining casereaders. */
/* In-memory cases are arranged in an array of arrays. The top
level is variable size and the size of each bottom level array
struct ccase c; /* Current case. */
};
+/* Return the case number of the current case */
+unsigned long
+casereader_cnum(const struct casereader *r)
+{
+ return r->case_idx;
+}
+
/* Doubly linked list of all casefiles. */
static struct casefile *casefiles;
static int safe_open (const char *filename, int flags);
static int safe_close (int fd);
-static int full_read (int fd, void *buffer, size_t size);
-static int full_write (int fd, const void *buffer, size_t size);
/* Creates and returns a casefile to store cases of VALUE_CNT
`union value's each. */
casefile_mode_reader (cf);
casefile_to_disk (cf);
+ flush_buffer (cf);
if (cf->fd != -1)
{
}
}
-/* Creates a temporary file and stores its name in *FILENAME and
- a file descriptor for it in *FD. Returns success. Caller is
- responsible for freeing *FILENAME. */
-static int
-make_temp_file (int *fd, char **filename)
-{
- const char *parent_dir;
-
- assert (filename != NULL);
- assert (fd != NULL);
-
- if (getenv ("TMPDIR") != NULL)
- parent_dir = getenv ("TMPDIR");
- else
- parent_dir = P_tmpdir;
-
- *filename = xmalloc (strlen (parent_dir) + 32);
- sprintf (*filename, "%s%cpsppXXXXXX", parent_dir, DIR_SEPARATOR);
- *fd = mkstemp (*filename);
- if (*fd < 0)
- {
- msg (FE, _("%s: Creating temporary file: %s."),
- *filename, strerror (errno));
- free (*filename);
- *filename = NULL;
- return 0;
- }
- return 1;
-}
/* If CF is currently stored in memory, writes it to disk. Readers, if any,
retain their current positions. */
cf->mode = READ;
reader = xmalloc (sizeof *reader);
- reader->cf = cf;
reader->next = cf->readers;
if (cf->readers != NULL)
reader->next->prev = reader;
- reader->prev = NULL;
cf->readers = reader;
+ reader->prev = NULL;
+ reader->cf = cf;
reader->case_idx = 0;
+ reader->destructive = 0;
reader->fd = -1;
reader->buffer = NULL;
reader->buffer_pos = 0;
}
/* Reads a copy of the next case from READER into C.
- Caller is responsible for destroying C. */
+ Caller is responsible for destroying C.
+ Returns true if successful, false at end of file. */
int
casereader_read (struct casereader *reader, struct ccase *c)
{
}
/* Reads the next case from READER into C and transfers ownership
- to the caller. Caller is responsible for destroying C. */
+ to the caller. Caller is responsible for destroying C.
+ Returns true if successful, false at end of file. */
int
casereader_read_xfer (struct casereader *reader, struct ccase *c)
{
}
}
+/* Reads the next case from READER into C and transfers ownership
+ to the caller. Caller is responsible for destroying C.
+ Assert-fails at end of file. */
+void
+casereader_read_xfer_assert (struct casereader *reader, struct ccase *c)
+{
+ bool success = casereader_read_xfer (reader, c);
+ assert (success);
+}
+
/* Destroys READER. */
void
casereader_destroy (struct casereader *reader)
return retval;
}
-/* Calls read(), passing FD, BUFFER, and SIZE, repeating as
- necessary to deal with interrupted calls. */
-static int
-full_read (int fd, void *buffer_, size_t size)
-{
- char *buffer = buffer_;
- size_t bytes_read = 0;
-
- while (bytes_read < size)
- {
- int retval = read (fd, buffer + bytes_read, size - bytes_read);
- if (retval > 0)
- bytes_read += retval;
- else if (retval == 0)
- return bytes_read;
- else if (errno != EINTR)
- return -1;
- }
-
- return bytes_read;
-}
-
-/* Calls write(), passing FD, BUFFER, and SIZE, repeating as
- necessary to deal with interrupted calls. */
-static int
-full_write (int fd, const void *buffer_, size_t size)
-{
- const char *buffer = buffer_;
- size_t bytes_written = 0;
-
- while (bytes_written < size)
- {
- int retval = write (fd, buffer + bytes_written, size - bytes_written);
- if (retval >= 0)
- bytes_written += retval;
- else if (errno != EINTR)
- return -1;
- }
-
- return bytes_written;
-}
-
/* Registers our exit handler with atexit() if it has not already
been registered. */
static void
}
}
+
+
/* atexit() handler that closes and deletes our temporary
files. */
static void
if (token != '.')
return lex_end_of_command ();
- for (pattern = 0; pattern < 5; pattern++)
+ for (pattern = 0; pattern < 6; pattern++)
{
const size_t *size;
static void
test_casefile (int pattern, size_t value_cnt, size_t case_cnt)
{
- int zero = 0;
struct casefile *cf;
struct casereader *r1, *r2;
struct ccase c;
rng = gsl_rng_alloc (gsl_rng_mt19937);
cf = casefile_create (value_cnt);
+ if (pattern == 5)
+ casefile_to_disk (cf);
for (i = 0; i < case_cnt; i++)
write_random_case (cf, i);
+ if (pattern == 5)
+ casefile_sleep (cf);
r1 = casefile_get_reader (cf);
r2 = casefile_get_reader (cf);
switch (pattern)
{
case 0:
+ case 5:
for (i = 0; i < case_cnt; i++)
{
read_and_verify_random_case (cf, r1, i);