You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
#include <config.h>
#include "casefile.h"
#include "alloc.h"
#include "case.h"
#include "error.h"
+#include "full-read.h"
+#include "full-write.h"
#include "misc.h"
+#include "mkfile.h"
#include "settings.h"
#include "var.h"
-#ifdef HAVE_VALGRIND_VALGRIND_H
-#include <valgrind/valgrind.h>
-#endif
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
#define IO_BUF_SIZE (8192 / sizeof (union value))
-/* A casefile is a sequentially accessible array of immutable
- cases. It may be stored in memory or on disk as workspace
- allows. Cases may be appended to the end of the file. Cases
- may be read sequentially starting from the beginning of the
- file. Once any cases have been read, no more cases may be
- appended. The entire file is discarded at once. */
+/* A casefile represents a sequentially accessible stream of
+ immutable cases.
+
+ If workspace allows, a casefile is maintained in memory. If
+ workspace overflows, then the casefile is pushed to disk. In
+ either case the interface presented to callers is kept the
+ same.
+
+ The life cycle of a casefile consists of up to three phases:
+
+ 1. Writing. The casefile initially contains no cases. In
+ this phase, any number of cases may be appended to the
+ end of a casefile. (Cases are never inserted in the
+ middle or before the beginning of a casefile.)
+
+ Use casefile_append() or casefile_append_xfer() to
+ append a case to a casefile.
+
+ 2. Reading. The casefile may be read sequentially,
+ starting from the beginning, by "casereaders". Any
+ number of casereaders may be created, at any time,
+ during the reading phase. Each casereader has an
+ independent position in the casefile.
+
+ Casereaders may only move forward. They cannot move
+ backward to arbitrary records or seek randomly.
+ Cloning casereaders is possible, but it is not yet
+ implemented.
+
+ Use casefile_get_reader() to create a casereader for
+ use in phase 2. This also transitions from phase 1 to
+ phase 2. Calling casefile_mode_reader() makes the same
+ transition, without creating a casereader.
+
+ Use casereader_read(), casereader_read_xfer(), or
+ casereader_read_xfer_assert() to read a case from a
+ casereader. Use casereader_destroy() to discard a
+ casereader when it is no longer needed.
+
+ 3. Destruction. This phase is optional. The casefile is
+ also read with casereaders in this phase, but the
+ ability to create new casereaders is curtailed.
+
+ In this phase, casereaders could still be cloned (once
+ we eventually implement cloning).
+
+ To transition from phase 1 or 2 to phase 3 and create a
+ casereader, call casefile_get_destructive_reader().
+ The same functions apply to the casereader obtained
+ this way as apply to casereaders obtained in phase 2.
+
+ After casefile_get_destructive_reader() is called, no
+ more casereaders may be created with
+ casefile_get_reader() or
+ casefile_get_destructive_reader(). (If cloning of
+ casereaders were implemented, it would still be
+ possible.)
+
+ The purpose of the limitations applied to casereaders
+ in phase 3 is to allow in-memory casefiles to fully
+ transfer ownership of cases to the casereaders,
+ avoiding the need for extra copies of case data. For
+ relatively static data sets with many variables, I
+ suspect (without evidence) that this may be a big
+ performance boost.
+
+ When a casefile is no longer needed, it may be destroyed with
+ casefile_destroy(). This function will also destroy any
+ remaining casereaders. */
/* In-memory cases are arranged in an array of arrays. The top
level is variable size and the size of each bottom level array
static int safe_open (const char *filename, int flags);
static int safe_close (int fd);
-static int full_read (int fd, void *buffer, size_t size);
-static int full_write (int fd, const void *buffer, size_t size);
/* Creates and returns a casefile to store cases of VALUE_CNT
`union value's each. */
/* Try memory first. */
if (cf->storage == MEMORY)
{
- if (case_bytes < get_max_workspace ())
+ if (case_bytes < get_workspace ())
{
size_t block_idx = cf->case_cnt / CASES_PER_BLOCK;
size_t case_idx = cf->case_cnt % CASES_PER_BLOCK;
if ((block_idx & (block_idx - 1)) == 0)
{
size_t block_cap = block_idx == 0 ? 1 : block_idx * 2;
- cf->cases = xrealloc (cf->cases,
- sizeof *cf->cases * block_cap);
+ cf->cases = xnrealloc (cf->cases,
+ block_cap, sizeof *cf->cases);
}
- cf->cases[block_idx] = xmalloc (sizeof **cf->cases
- * CASES_PER_BLOCK);
+ cf->cases[block_idx] = xnmalloc (CASES_PER_BLOCK,
+ sizeof **cf->cases);
}
case_move (&cf->cases[block_idx][case_idx], &new_case);
cf->storage = DISK;
if (!make_temp_file (&cf->fd, &cf->filename))
err_failure ();
- cf->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer);
+ cf->buffer = xnmalloc (cf->buffer_size, sizeof *cf->buffer);
memset (cf->buffer, 0, cf->buffer_size * sizeof *cf->buffer);
case_bytes -= cf->case_cnt * cf->case_acct_size;
}
else
{
- reader->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer);
+ reader->buffer = xnmalloc (cf->buffer_size, sizeof *cf->buffer);
memset (reader->buffer, 0, cf->buffer_size * sizeof *cf->buffer);
}
}
/* Reads a copy of the next case from READER into C.
- Caller is responsible for destroying C. */
+ Caller is responsible for destroying C.
+ Returns true if successful, false at end of file. */
int
casereader_read (struct casereader *reader, struct ccase *c)
{
}
/* Reads the next case from READER into C and transfers ownership
- to the caller. Caller is responsible for destroying C. */
+ to the caller. Caller is responsible for destroying C.
+ Returns true if successful, false at end of file. */
int
casereader_read_xfer (struct casereader *reader, struct ccase *c)
{
}
}
+/* Reads the next case from READER into C and transfers ownership
+ to the caller. Caller is responsible for destroying C.
+ Assert-fails at end of file. */
+void
+casereader_read_xfer_assert (struct casereader *reader, struct ccase *c)
+{
+ bool success = casereader_read_xfer (reader, c);
+ assert (success);
+}
+
/* Destroys READER. */
void
casereader_destroy (struct casereader *reader)
return retval;
}
-/* Calls read(), passing FD, BUFFER, and SIZE, repeating as
- necessary to deal with interrupted calls. */
-static int
-full_read (int fd, void *buffer_, size_t size)
-{
- char *buffer = buffer_;
- size_t bytes_read = 0;
-
- while (bytes_read < size)
- {
- int retval = read (fd, buffer + bytes_read, size - bytes_read);
- if (retval > 0)
- bytes_read += retval;
- else if (retval == 0)
- return bytes_read;
- else if (errno != EINTR)
- return -1;
- }
-
- return bytes_read;
-}
-
-/* Calls write(), passing FD, BUFFER, and SIZE, repeating as
- necessary to deal with interrupted calls. */
-static int
-full_write (int fd, const void *buffer_, size_t size)
-{
- const char *buffer = buffer_;
- size_t bytes_written = 0;
-
- while (bytes_written < size)
- {
- int retval = write (fd, buffer + bytes_written, size - bytes_written);
- if (retval >= 0)
- bytes_written += retval;
- else if (errno != EINTR)
- return -1;
- }
-
- return bytes_written;
-}
-
-
/* Registers our exit handler with atexit() if it has not already
been registered. */
static void
while (casefiles != NULL)
casefile_destroy (casefiles);
}
-\f
-#include <gsl/gsl_rng.h>
-#include <stdarg.h>
-#include "command.h"
-#include "lexer.h"
-
-static void test_casefile (int pattern, size_t value_cnt, size_t case_cnt);
-static void get_random_case (struct ccase *, size_t value_cnt,
- size_t case_idx);
-static void write_random_case (struct casefile *cf, size_t case_idx);
-static void read_and_verify_random_case (struct casefile *cf,
- struct casereader *reader,
- size_t case_idx);
-static void fail_test (const char *message, ...);
-
-int
-cmd_debug_casefile (void)
-{
- static const size_t sizes[] =
- {
- 1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 31, 55, 73,
- 100, 137, 257, 521, 1031, 2053
- };
- int size_max;
- int case_max;
- int pattern;
-
- size_max = sizeof sizes / sizeof *sizes;
- if (lex_match_id ("SMALL"))
- {
- size_max -= 4;
- case_max = 511;
- }
- else
- case_max = 4095;
- if (token != '.')
- return lex_end_of_command ();
-
- for (pattern = 0; pattern < 6; pattern++)
- {
- const size_t *size;
-
- for (size = sizes; size < sizes + size_max; size++)
- {
- size_t case_cnt;
-
- for (case_cnt = 0; case_cnt <= case_max;
- case_cnt = (case_cnt * 2) + 1)
- test_casefile (pattern, *size, case_cnt);
- }
- }
- printf ("Casefile tests succeeded.\n");
- return CMD_SUCCESS;
-}
-
-static void
-test_casefile (int pattern, size_t value_cnt, size_t case_cnt)
-{
- struct casefile *cf;
- struct casereader *r1, *r2;
- struct ccase c;
- gsl_rng *rng;
- size_t i, j;
-
- rng = gsl_rng_alloc (gsl_rng_mt19937);
- cf = casefile_create (value_cnt);
- if (pattern == 5)
- casefile_to_disk (cf);
- for (i = 0; i < case_cnt; i++)
- write_random_case (cf, i);
- if (pattern == 5)
- casefile_sleep (cf);
- r1 = casefile_get_reader (cf);
- r2 = casefile_get_reader (cf);
- switch (pattern)
- {
- case 0:
- case 5:
- for (i = 0; i < case_cnt; i++)
- {
- read_and_verify_random_case (cf, r1, i);
- read_and_verify_random_case (cf, r2, i);
- }
- break;
- case 1:
- for (i = 0; i < case_cnt; i++)
- read_and_verify_random_case (cf, r1, i);
- for (i = 0; i < case_cnt; i++)
- read_and_verify_random_case (cf, r2, i);
- break;
- case 2:
- case 3:
- case 4:
- for (i = j = 0; i < case_cnt; i++)
- {
- read_and_verify_random_case (cf, r1, i);
- if (gsl_rng_get (rng) % pattern == 0)
- read_and_verify_random_case (cf, r2, j++);
- if (i == case_cnt / 2)
- casefile_to_disk (cf);
- }
- for (; j < case_cnt; j++)
- read_and_verify_random_case (cf, r2, j);
- break;
- }
- if (casereader_read (r1, &c))
- fail_test ("Casereader 1 not at end of file.");
- if (casereader_read (r2, &c))
- fail_test ("Casereader 2 not at end of file.");
- if (pattern != 1)
- casereader_destroy (r1);
- if (pattern != 2)
- casereader_destroy (r2);
- if (pattern > 2)
- {
- r1 = casefile_get_destructive_reader (cf);
- for (i = 0; i < case_cnt; i++)
- {
- struct ccase read_case, expected_case;
-
- get_random_case (&expected_case, value_cnt, i);
- if (!casereader_read_xfer (r1, &read_case))
- fail_test ("Premature end of casefile.");
- for (j = 0; j < value_cnt; j++)
- {
- double a = case_num (&read_case, j);
- double b = case_num (&expected_case, j);
- if (a != b)
- fail_test ("Case %lu fails comparison.", (unsigned long) i);
- }
- case_destroy (&expected_case);
- case_destroy (&read_case);
- }
- casereader_destroy (r1);
- }
- casefile_destroy (cf);
- gsl_rng_free (rng);
-}
-
-static void
-get_random_case (struct ccase *c, size_t value_cnt, size_t case_idx)
-{
- int i;
- case_create (c, value_cnt);
- for (i = 0; i < value_cnt; i++)
- case_data_rw (c, i)->f = case_idx % 257 + i;
-}
-
-static void
-write_random_case (struct casefile *cf, size_t case_idx)
-{
- struct ccase c;
- get_random_case (&c, casefile_get_value_cnt (cf), case_idx);
- casefile_append_xfer (cf, &c);
-}
-
-static void
-read_and_verify_random_case (struct casefile *cf,
- struct casereader *reader, size_t case_idx)
-{
- struct ccase read_case, expected_case;
- size_t value_cnt;
- size_t i;
-
- value_cnt = casefile_get_value_cnt (cf);
- get_random_case (&expected_case, value_cnt, case_idx);
- if (!casereader_read (reader, &read_case))
- fail_test ("Premature end of casefile.");
- for (i = 0; i < value_cnt; i++)
- {
- double a = case_num (&read_case, i);
- double b = case_num (&expected_case, i);
- if (a != b)
- fail_test ("Case %lu fails comparison.", (unsigned long) case_idx);
- }
- case_destroy (&read_case);
- case_destroy (&expected_case);
-}
-
-static void
-fail_test (const char *message, ...)
-{
- va_list args;
-
- va_start (args, message);
- vprintf (message, args);
- putchar ('\n');
- va_end (args);
-
- exit (1);
-}