X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fcasefile.c;h=9c3da66023384390cf3018d61a202abee95d851c;hb=1f8dd363d6c20d07fcca14cb948018465fa5ed8b;hp=2ff3a5742f3492ff65c719c38ebb6ebe03c96219;hpb=06f9ee45954e5e71fa7f6262dbf37defa1dbf996;p=pspp-builds.git diff --git a/src/casefile.c b/src/casefile.c index 2ff3a574..9c3da660 100644 --- a/src/casefile.c +++ b/src/casefile.c @@ -14,8 +14,8 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ #include #include "casefile.h" @@ -29,22 +29,87 @@ #include "alloc.h" #include "case.h" #include "error.h" +#include "full-read.h" +#include "full-write.h" #include "misc.h" +#include "mkfile.h" #include "settings.h" #include "var.h" -#ifdef HAVE_VALGRIND_VALGRIND_H -#include -#endif +#include "gettext.h" +#define _(msgid) gettext (msgid) -#define IO_BUF_SIZE 8192 +#define IO_BUF_SIZE (8192 / sizeof (union value)) -/* A casefile is a sequentially accessible array of immutable - cases. It may be stored in memory or on disk as workspace - allows. Cases may be appended to the end of the file. Cases - may be read sequentially starting from the beginning of the - file. Once any cases have been read, no more cases may be - appended. The entire file is discarded at once. */ +/* A casefile represents a sequentially accessible stream of + immutable cases. + + If workspace allows, a casefile is maintained in memory. If + workspace overflows, then the casefile is pushed to disk. In + either case the interface presented to callers is kept the + same. + + The life cycle of a casefile consists of up to three phases: + + 1. Writing. The casefile initially contains no cases. In + this phase, any number of cases may be appended to the + end of a casefile. (Cases are never inserted in the + middle or before the beginning of a casefile.) + + Use casefile_append() or casefile_append_xfer() to + append a case to a casefile. + + 2. Reading. The casefile may be read sequentially, + starting from the beginning, by "casereaders". Any + number of casereaders may be created, at any time, + during the reading phase. Each casereader has an + independent position in the casefile. + + Casereaders may only move forward. They cannot move + backward to arbitrary records or seek randomly. + Cloning casereaders is possible, but it is not yet + implemented. + + Use casefile_get_reader() to create a casereader for + use in phase 2. This also transitions from phase 1 to + phase 2. Calling casefile_mode_reader() makes the same + transition, without creating a casereader. + + Use casereader_read(), casereader_read_xfer(), or + casereader_read_xfer_assert() to read a case from a + casereader. Use casereader_destroy() to discard a + casereader when it is no longer needed. + + 3. Destruction. This phase is optional. The casefile is + also read with casereaders in this phase, but the + ability to create new casereaders is curtailed. + + In this phase, casereaders could still be cloned (once + we eventually implement cloning). + + To transition from phase 1 or 2 to phase 3 and create a + casereader, call casefile_get_destructive_reader(). + The same functions apply to the casereader obtained + this way as apply to casereaders obtained in phase 2. + + After casefile_get_destructive_reader() is called, no + more casereaders may be created with + casefile_get_reader() or + casefile_get_destructive_reader(). (If cloning of + casereaders were implemented, it would still be + possible.) + + The purpose of the limitations applied to casereaders + in phase 3 is to allow in-memory casefiles to fully + transfer ownership of cases to the casereaders, + avoiding the need for extra copies of case data. For + relatively static data sets with many variables, I + suspect (without evidence) that this may be a big + performance boost. + + When a casefile is no longer needed, it may be destroyed with + casefile_destroy(). This function will also destroy any + remaining casereaders. */ /* In-memory cases are arranged in an array of arrays. The top level is variable size and the size of each bottom level array @@ -57,7 +122,6 @@ struct casefile /* Basic data. */ struct casefile *next, *prev; /* Next, prev in global list. */ size_t value_cnt; /* Case size in `union value's. */ - size_t case_size; /* Case size in bytes. */ size_t case_acct_size; /* Case size for accounting. */ unsigned long case_cnt; /* Number of cases stored. */ enum { MEMORY, DISK } storage; /* Where cases are stored. */ @@ -71,9 +135,9 @@ struct casefile /* Disk storage. */ int fd; /* File descriptor, -1 if none. */ char *filename; /* Filename. */ - unsigned char *buffer; /* I/O buffer, NULL if none. */ - size_t buffer_used; /* Number of bytes used in buffer. */ - size_t buffer_size; /* Buffer size in bytes. */ + union value *buffer; /* I/O buffer, NULL if none. */ + size_t buffer_used; /* Number of values used in buffer. */ + size_t buffer_size; /* Buffer size in values. */ }; /* For reading out the cases in a casefile. */ @@ -86,11 +150,18 @@ struct casereader /* Disk storage. */ int fd; /* File descriptor. */ - unsigned char *buffer; /* I/O buffer. */ - size_t buffer_pos; /* Byte offset of buffer position. */ + union value *buffer; /* I/O buffer. */ + size_t buffer_pos; /* Offset of buffer position. */ struct ccase c; /* Current case. */ }; +/* Return the case number of the current case */ +unsigned long +casereader_cnum(const struct casereader *r) +{ + return r->case_idx; +} + /* Doubly linked list of all casefiles. */ static struct casefile *casefiles; @@ -107,8 +178,6 @@ static void fill_buffer (struct casereader *reader); static int safe_open (const char *filename, int flags); static int safe_close (int fd); -static int full_read (int fd, char *buffer, size_t size); -static int full_write (int fd, const char *buffer, size_t size); /* Creates and returns a casefile to store cases of VALUE_CNT `union value's each. */ @@ -122,8 +191,7 @@ casefile_create (size_t value_cnt) cf->next->prev = cf; casefiles = cf; cf->value_cnt = value_cnt; - cf->case_size = case_serial_size (value_cnt); - cf->case_acct_size = cf->case_size + 4 * sizeof (void *); + cf->case_acct_size = (cf->value_cnt + 4) * sizeof *cf->buffer; cf->case_cnt = 0; cf->storage = MEMORY; cf->mode = WRITE; @@ -133,9 +201,9 @@ casefile_create (size_t value_cnt) cf->fd = -1; cf->filename = NULL; cf->buffer = NULL; - cf->buffer_size = ROUND_UP (cf->case_size, IO_BUF_SIZE); - if (cf->case_size > 0 && cf->buffer_size % cf->case_size > 512) - cf->buffer_size = cf->case_size; + cf->buffer_size = ROUND_UP (cf->value_cnt, IO_BUF_SIZE); + if (cf->value_cnt > 0 && cf->buffer_size % cf->value_cnt > 64) + cf->buffer_size = cf->value_cnt; cf->buffer_used = 0; register_atexit (); return cf; @@ -219,6 +287,7 @@ casefile_sleep (const struct casefile *cf_) casefile_mode_reader (cf); casefile_to_disk (cf); + flush_buffer (cf); if (cf->fd != -1) { @@ -312,9 +381,9 @@ casefile_append_xfer (struct casefile *cf, struct ccase *c) static void write_case_to_disk (struct casefile *cf, const struct ccase *c) { - case_serialize (c, cf->buffer + cf->buffer_used, cf->case_size); - cf->buffer_used += cf->case_size; - if (cf->buffer_used + cf->case_size > cf->buffer_size) + case_to_values (c, cf->buffer + cf->buffer_used, cf->value_cnt); + cf->buffer_used += cf->value_cnt; + if (cf->buffer_used + cf->value_cnt > cf->buffer_size) flush_buffer (cf); } @@ -325,42 +394,14 @@ flush_buffer (struct casefile *cf) { if (cf->buffer_used > 0) { - if (!full_write (cf->fd, cf->buffer, cf->buffer_size)) + if (!full_write (cf->fd, cf->buffer, + cf->buffer_size * sizeof *cf->buffer)) msg (FE, _("Error writing temporary file: %s."), strerror (errno)); cf->buffer_used = 0; } } -/* Creates a temporary file and stores its name in *FILENAME and - a file descriptor for it in *FD. Returns success. Caller is - responsible for freeing *FILENAME. */ -static int -make_temp_file (int *fd, char **filename) -{ - const char *parent_dir; - - assert (filename != NULL); - assert (fd != NULL); - - if (getenv ("TMPDIR") != NULL) - parent_dir = getenv ("TMPDIR"); - else - parent_dir = P_tmpdir; - - *filename = xmalloc (strlen (parent_dir) + 32); - sprintf (*filename, "%s%cpsppXXXXXX", parent_dir, DIR_SEPARATOR); - *fd = mkstemp (*filename); - if (*fd < 0) - { - msg (FE, _("%s: Creating temporary file: %s."), - *filename, strerror (errno)); - free (*filename); - *filename = NULL; - return 0; - } - return 1; -} /* If CF is currently stored in memory, writes it to disk. Readers, if any, retain their current positions. */ @@ -371,7 +412,7 @@ casefile_to_disk (const struct casefile *cf_) struct casereader *reader; assert (cf != NULL); - + if (cf->storage == MEMORY) { size_t idx, block_cnt; @@ -383,8 +424,8 @@ casefile_to_disk (const struct casefile *cf_) cf->storage = DISK; if (!make_temp_file (&cf->fd, &cf->filename)) err_failure (); - cf->buffer = xmalloc (cf->buffer_size); - memset (cf->buffer, 0, cf->buffer_size); + cf->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer); + memset (cf->buffer, 0, cf->buffer_size * sizeof *cf->buffer); case_bytes -= cf->case_cnt * cf->case_acct_size; for (idx = 0; idx < cf->case_cnt; idx++) @@ -438,13 +479,14 @@ casefile_get_reader (const struct casefile *cf_) cf->mode = READ; reader = xmalloc (sizeof *reader); - reader->cf = cf; reader->next = cf->readers; if (cf->readers != NULL) reader->next->prev = reader; - reader->prev = NULL; cf->readers = reader; + reader->prev = NULL; + reader->cf = cf; reader->case_idx = 0; + reader->destructive = 0; reader->fd = -1; reader->buffer = NULL; reader->buffer_pos = 0; @@ -482,7 +524,6 @@ static void reader_open_file (struct casereader *reader) { struct casefile *cf = reader->cf; - size_t buffer_case_cnt; off_t file_ofs; if (reader->case_idx >= cf->case_cnt) @@ -508,17 +549,17 @@ reader_open_file (struct casereader *reader) } else { - reader->buffer = xmalloc (cf->buffer_size); - memset (reader->buffer, 0, cf->buffer_size); + reader->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer); + memset (reader->buffer, 0, cf->buffer_size * sizeof *cf->buffer); } - if (cf->case_size != 0) + if (cf->value_cnt != 0) { - buffer_case_cnt = cf->buffer_size / cf->case_size; - file_ofs = ((off_t) reader->case_idx - / buffer_case_cnt * cf->buffer_size); + size_t buffer_case_cnt = cf->buffer_size / cf->value_cnt; + file_ofs = ((off_t) reader->case_idx / buffer_case_cnt + * cf->buffer_size * sizeof *cf->buffer); reader->buffer_pos = (reader->case_idx % buffer_case_cnt - * cf->case_size); + * cf->value_cnt); } else file_ofs = 0; @@ -526,7 +567,7 @@ reader_open_file (struct casereader *reader) msg (FE, _("%s: Seeking temporary file: %s."), cf->filename, strerror (errno)); - if (cf->case_cnt > 0 && cf->case_size > 0) + if (cf->case_cnt > 0 && cf->value_cnt > 0) fill_buffer (reader); case_create (&reader->c, cf->value_cnt); @@ -536,11 +577,12 @@ reader_open_file (struct casereader *reader) static void fill_buffer (struct casereader *reader) { - int retval = full_read (reader->fd, reader->buffer, reader->cf->buffer_size); + int retval = full_read (reader->fd, reader->buffer, + reader->cf->buffer_size * sizeof *reader->buffer); if (retval < 0) msg (FE, _("%s: Reading temporary file: %s."), reader->cf->filename, strerror (errno)); - else if (retval != reader->cf->buffer_size) + else if (retval != reader->cf->buffer_size * sizeof *reader->buffer) msg (FE, _("%s: Temporary file ended unexpectedly."), reader->cf->filename); } @@ -555,7 +597,8 @@ casereader_get_casefile (const struct casereader *reader) } /* Reads a copy of the next case from READER into C. - Caller is responsible for destroying C. */ + Caller is responsible for destroying C. + Returns true if successful, false at end of file. */ int casereader_read (struct casereader *reader, struct ccase *c) { @@ -575,15 +618,15 @@ casereader_read (struct casereader *reader, struct ccase *c) } else { - if (reader->buffer_pos + reader->cf->case_size > reader->cf->buffer_size) + if (reader->buffer_pos + reader->cf->value_cnt > reader->cf->buffer_size) { fill_buffer (reader); reader->buffer_pos = 0; } - case_unserialize (&reader->c, reader->buffer + reader->buffer_pos, - reader->cf->case_size); - reader->buffer_pos += reader->cf->case_size; + case_from_values (&reader->c, reader->buffer + reader->buffer_pos, + reader->cf->value_cnt); + reader->buffer_pos += reader->cf->value_cnt; reader->case_idx++; case_clone (c, &reader->c); @@ -592,7 +635,8 @@ casereader_read (struct casereader *reader, struct ccase *c) } /* Reads the next case from READER into C and transfers ownership - to the caller. Caller is responsible for destroying C. */ + to the caller. Caller is responsible for destroying C. + Returns true if successful, false at end of file. */ int casereader_read_xfer (struct casereader *reader, struct ccase *c) { @@ -614,6 +658,16 @@ casereader_read_xfer (struct casereader *reader, struct ccase *c) } } +/* Reads the next case from READER into C and transfers ownership + to the caller. Caller is responsible for destroying C. + Assert-fails at end of file. */ +void +casereader_read_xfer_assert (struct casereader *reader, struct ccase *c) +{ + bool success = casereader_read_xfer (reader, c); + assert (success); +} + /* Destroys READER. */ void casereader_destroy (struct casereader *reader) @@ -676,46 +730,6 @@ static int safe_close (int fd) return retval; } -/* Calls read(), passing FD, BUFFER, and SIZE, repeating as - necessary to deal with interrupted calls. */ -static int -full_read (int fd, char *buffer, size_t size) -{ - size_t bytes_read = 0; - - while (bytes_read < size) - { - int retval = read (fd, buffer + bytes_read, size - bytes_read); - if (retval > 0) - bytes_read += retval; - else if (retval == 0) - return bytes_read; - else if (errno != EINTR) - return -1; - } - - return bytes_read; -} - -/* Calls write(), passing FD, BUFFER, and SIZE, repeating as - necessary to deal with interrupted calls. */ -static int -full_write (int fd, const char *buffer, size_t size) -{ - size_t bytes_written = 0; - - while (bytes_written < size) - { - int retval = write (fd, buffer + bytes_written, size - bytes_written); - if (retval >= 0) - bytes_written += retval; - else if (errno != EINTR) - return -1; - } - - return bytes_written; -} - /* Registers our exit handler with atexit() if it has not already been registered. */ static void @@ -729,6 +743,8 @@ register_atexit (void) } } + + /* atexit() handler that closes and deletes our temporary files. */ static void @@ -738,9 +754,9 @@ exit_handler (void) casefile_destroy (casefiles); } +#include #include #include "command.h" -#include "random.h" #include "lexer.h" static void test_casefile (int pattern, size_t value_cnt, size_t case_cnt); @@ -775,7 +791,7 @@ cmd_debug_casefile (void) if (token != '.') return lex_end_of_command (); - for (pattern = 0; pattern < 5; pattern++) + for (pattern = 0; pattern < 6; pattern++) { const size_t *size; @@ -795,23 +811,26 @@ cmd_debug_casefile (void) static void test_casefile (int pattern, size_t value_cnt, size_t case_cnt) { - int zero = 0; struct casefile *cf; struct casereader *r1, *r2; struct ccase c; - struct rng *rng; + gsl_rng *rng; size_t i, j; - rng = rng_create (); - rng_seed (rng, &zero, sizeof zero); + rng = gsl_rng_alloc (gsl_rng_mt19937); cf = casefile_create (value_cnt); + if (pattern == 5) + casefile_to_disk (cf); for (i = 0; i < case_cnt; i++) write_random_case (cf, i); + if (pattern == 5) + casefile_sleep (cf); r1 = casefile_get_reader (cf); r2 = casefile_get_reader (cf); switch (pattern) { case 0: + case 5: for (i = 0; i < case_cnt; i++) { read_and_verify_random_case (cf, r1, i); @@ -830,8 +849,8 @@ test_casefile (int pattern, size_t value_cnt, size_t case_cnt) for (i = j = 0; i < case_cnt; i++) { read_and_verify_random_case (cf, r1, i); - if (rng_get_int (rng) % pattern == 0) - read_and_verify_random_case (cf, r2, j++); + if (gsl_rng_get (rng) % pattern == 0) + read_and_verify_random_case (cf, r2, j++); if (i == case_cnt / 2) casefile_to_disk (cf); } @@ -870,7 +889,7 @@ test_casefile (int pattern, size_t value_cnt, size_t case_cnt) casereader_destroy (r1); } casefile_destroy (cf); - rng_destroy (rng); + gsl_rng_free (rng); } static void