X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fcasefile.c;h=8fe0740371aee2eab107119a46789e99ede08a64;hb=53ceff2865473a6b561b521986fafd31a993a1a6;hp=eca07f83e2610fcdb499372e661209dbb764e67b;hpb=cb962ee9edc95f73507d35f0714ec8aa68c5295c;p=pspp diff --git a/src/casefile.c b/src/casefile.c index eca07f83e2..8fe0740371 100644 --- a/src/casefile.c +++ b/src/casefile.c @@ -14,8 +14,8 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ #include #include "casefile.h" @@ -29,22 +29,87 @@ #include "alloc.h" #include "case.h" #include "error.h" +#include "full-read.h" +#include "full-write.h" #include "misc.h" +#include "mkfile.h" #include "settings.h" #include "var.h" -#ifdef HAVE_VALGRIND_VALGRIND_H -#include -#endif +#include "gettext.h" +#define _(msgid) gettext (msgid) #define IO_BUF_SIZE (8192 / sizeof (union value)) -/* A casefile is a sequentially accessible array of immutable - cases. It may be stored in memory or on disk as workspace - allows. Cases may be appended to the end of the file. Cases - may be read sequentially starting from the beginning of the - file. Once any cases have been read, no more cases may be - appended. The entire file is discarded at once. */ +/* A casefile represents a sequentially accessible stream of + immutable cases. + + If workspace allows, a casefile is maintained in memory. If + workspace overflows, then the casefile is pushed to disk. In + either case the interface presented to callers is kept the + same. + + The life cycle of a casefile consists of up to three phases: + + 1. Writing. The casefile initially contains no cases. In + this phase, any number of cases may be appended to the + end of a casefile. (Cases are never inserted in the + middle or before the beginning of a casefile.) + + Use casefile_append() or casefile_append_xfer() to + append a case to a casefile. + + 2. Reading. The casefile may be read sequentially, + starting from the beginning, by "casereaders". Any + number of casereaders may be created, at any time, + during the reading phase. Each casereader has an + independent position in the casefile. + + Casereaders may only move forward. They cannot move + backward to arbitrary records or seek randomly. + Cloning casereaders is possible, but it is not yet + implemented. + + Use casefile_get_reader() to create a casereader for + use in phase 2. This also transitions from phase 1 to + phase 2. Calling casefile_mode_reader() makes the same + transition, without creating a casereader. + + Use casereader_read(), casereader_read_xfer(), or + casereader_read_xfer_assert() to read a case from a + casereader. Use casereader_destroy() to discard a + casereader when it is no longer needed. + + 3. Destruction. This phase is optional. The casefile is + also read with casereaders in this phase, but the + ability to create new casereaders is curtailed. + + In this phase, casereaders could still be cloned (once + we eventually implement cloning). + + To transition from phase 1 or 2 to phase 3 and create a + casereader, call casefile_get_destructive_reader(). + The same functions apply to the casereader obtained + this way as apply to casereaders obtained in phase 2. + + After casefile_get_destructive_reader() is called, no + more casereaders may be created with + casefile_get_reader() or + casefile_get_destructive_reader(). (If cloning of + casereaders were implemented, it would still be + possible.) + + The purpose of the limitations applied to casereaders + in phase 3 is to allow in-memory casefiles to fully + transfer ownership of cases to the casereaders, + avoiding the need for extra copies of case data. For + relatively static data sets with many variables, I + suspect (without evidence) that this may be a big + performance boost. + + When a casefile is no longer needed, it may be destroyed with + casefile_destroy(). This function will also destroy any + remaining casereaders. */ /* In-memory cases are arranged in an array of arrays. The top level is variable size and the size of each bottom level array @@ -113,8 +178,6 @@ static void fill_buffer (struct casereader *reader); static int safe_open (const char *filename, int flags); static int safe_close (int fd); -static int full_read (int fd, void *buffer, size_t size); -static int full_write (int fd, const void *buffer, size_t size); /* Creates and returns a casefile to store cases of VALUE_CNT `union value's each. */ @@ -268,7 +331,7 @@ casefile_append (struct casefile *cf, const struct ccase *c) /* Try memory first. */ if (cf->storage == MEMORY) { - if (case_bytes < get_max_workspace ()) + if (case_bytes < get_workspace ()) { size_t block_idx = cf->case_cnt / CASES_PER_BLOCK; size_t case_idx = cf->case_cnt % CASES_PER_BLOCK; @@ -281,12 +344,12 @@ casefile_append (struct casefile *cf, const struct ccase *c) if ((block_idx & (block_idx - 1)) == 0) { size_t block_cap = block_idx == 0 ? 1 : block_idx * 2; - cf->cases = xrealloc (cf->cases, - sizeof *cf->cases * block_cap); + cf->cases = xnrealloc (cf->cases, + block_cap, sizeof *cf->cases); } - cf->cases[block_idx] = xmalloc (sizeof **cf->cases - * CASES_PER_BLOCK); + cf->cases[block_idx] = xnmalloc (CASES_PER_BLOCK, + sizeof **cf->cases); } case_move (&cf->cases[block_idx][case_idx], &new_case); @@ -339,35 +402,6 @@ flush_buffer (struct casefile *cf) } } -/* Creates a temporary file and stores its name in *FILENAME and - a file descriptor for it in *FD. Returns success. Caller is - responsible for freeing *FILENAME. */ -static int -make_temp_file (int *fd, char **filename) -{ - const char *parent_dir; - - assert (filename != NULL); - assert (fd != NULL); - - if (getenv ("TMPDIR") != NULL) - parent_dir = getenv ("TMPDIR"); - else - parent_dir = P_tmpdir; - - *filename = xmalloc (strlen (parent_dir) + 32); - sprintf (*filename, "%s%cpsppXXXXXX", parent_dir, DIR_SEPARATOR); - *fd = mkstemp (*filename); - if (*fd < 0) - { - msg (FE, _("%s: Creating temporary file: %s."), - *filename, strerror (errno)); - free (*filename); - *filename = NULL; - return 0; - } - return 1; -} /* If CF is currently stored in memory, writes it to disk. Readers, if any, retain their current positions. */ @@ -390,7 +424,7 @@ casefile_to_disk (const struct casefile *cf_) cf->storage = DISK; if (!make_temp_file (&cf->fd, &cf->filename)) err_failure (); - cf->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer); + cf->buffer = xnmalloc (cf->buffer_size, sizeof *cf->buffer); memset (cf->buffer, 0, cf->buffer_size * sizeof *cf->buffer); case_bytes -= cf->case_cnt * cf->case_acct_size; @@ -515,7 +549,7 @@ reader_open_file (struct casereader *reader) } else { - reader->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer); + reader->buffer = xnmalloc (cf->buffer_size, sizeof *cf->buffer); memset (reader->buffer, 0, cf->buffer_size * sizeof *cf->buffer); } @@ -563,7 +597,8 @@ casereader_get_casefile (const struct casereader *reader) } /* Reads a copy of the next case from READER into C. - Caller is responsible for destroying C. */ + Caller is responsible for destroying C. + Returns true if successful, false at end of file. */ int casereader_read (struct casereader *reader, struct ccase *c) { @@ -600,7 +635,8 @@ casereader_read (struct casereader *reader, struct ccase *c) } /* Reads the next case from READER into C and transfers ownership - to the caller. Caller is responsible for destroying C. */ + to the caller. Caller is responsible for destroying C. + Returns true if successful, false at end of file. */ int casereader_read_xfer (struct casereader *reader, struct ccase *c) { @@ -622,6 +658,16 @@ casereader_read_xfer (struct casereader *reader, struct ccase *c) } } +/* Reads the next case from READER into C and transfers ownership + to the caller. Caller is responsible for destroying C. + Assert-fails at end of file. */ +void +casereader_read_xfer_assert (struct casereader *reader, struct ccase *c) +{ + bool success = casereader_read_xfer (reader, c); + assert (success); +} + /* Destroys READER. */ void casereader_destroy (struct casereader *reader) @@ -684,48 +730,6 @@ static int safe_close (int fd) return retval; } -/* Calls read(), passing FD, BUFFER, and SIZE, repeating as - necessary to deal with interrupted calls. */ -static int -full_read (int fd, void *buffer_, size_t size) -{ - char *buffer = buffer_; - size_t bytes_read = 0; - - while (bytes_read < size) - { - int retval = read (fd, buffer + bytes_read, size - bytes_read); - if (retval > 0) - bytes_read += retval; - else if (retval == 0) - return bytes_read; - else if (errno != EINTR) - return -1; - } - - return bytes_read; -} - -/* Calls write(), passing FD, BUFFER, and SIZE, repeating as - necessary to deal with interrupted calls. */ -static int -full_write (int fd, const void *buffer_, size_t size) -{ - const char *buffer = buffer_; - size_t bytes_written = 0; - - while (bytes_written < size) - { - int retval = write (fd, buffer + bytes_written, size - bytes_written); - if (retval >= 0) - bytes_written += retval; - else if (errno != EINTR) - return -1; - } - - return bytes_written; -} - /* Registers our exit handler with atexit() if it has not already been registered. */ static void @@ -739,6 +743,8 @@ register_atexit (void) } } + + /* atexit() handler that closes and deletes our temporary files. */ static void @@ -747,194 +753,3 @@ exit_handler (void) while (casefiles != NULL) casefile_destroy (casefiles); } - -#include -#include -#include "command.h" -#include "lexer.h" - -static void test_casefile (int pattern, size_t value_cnt, size_t case_cnt); -static void get_random_case (struct ccase *, size_t value_cnt, - size_t case_idx); -static void write_random_case (struct casefile *cf, size_t case_idx); -static void read_and_verify_random_case (struct casefile *cf, - struct casereader *reader, - size_t case_idx); -static void fail_test (const char *message, ...); - -int -cmd_debug_casefile (void) -{ - static const size_t sizes[] = - { - 1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 31, 55, 73, - 100, 137, 257, 521, 1031, 2053 - }; - int size_max; - int case_max; - int pattern; - - size_max = sizeof sizes / sizeof *sizes; - if (lex_match_id ("SMALL")) - { - size_max -= 4; - case_max = 511; - } - else - case_max = 4095; - if (token != '.') - return lex_end_of_command (); - - for (pattern = 0; pattern < 6; pattern++) - { - const size_t *size; - - for (size = sizes; size < sizes + size_max; size++) - { - size_t case_cnt; - - for (case_cnt = 0; case_cnt <= case_max; - case_cnt = (case_cnt * 2) + 1) - test_casefile (pattern, *size, case_cnt); - } - } - printf ("Casefile tests succeeded.\n"); - return CMD_SUCCESS; -} - -static void -test_casefile (int pattern, size_t value_cnt, size_t case_cnt) -{ - struct casefile *cf; - struct casereader *r1, *r2; - struct ccase c; - gsl_rng *rng; - size_t i, j; - - rng = gsl_rng_alloc (gsl_rng_mt19937); - cf = casefile_create (value_cnt); - if (pattern == 5) - casefile_to_disk (cf); - for (i = 0; i < case_cnt; i++) - write_random_case (cf, i); - if (pattern == 5) - casefile_sleep (cf); - r1 = casefile_get_reader (cf); - r2 = casefile_get_reader (cf); - switch (pattern) - { - case 0: - case 5: - for (i = 0; i < case_cnt; i++) - { - read_and_verify_random_case (cf, r1, i); - read_and_verify_random_case (cf, r2, i); - } - break; - case 1: - for (i = 0; i < case_cnt; i++) - read_and_verify_random_case (cf, r1, i); - for (i = 0; i < case_cnt; i++) - read_and_verify_random_case (cf, r2, i); - break; - case 2: - case 3: - case 4: - for (i = j = 0; i < case_cnt; i++) - { - read_and_verify_random_case (cf, r1, i); - if (gsl_rng_get (rng) % pattern == 0) - read_and_verify_random_case (cf, r2, j++); - if (i == case_cnt / 2) - casefile_to_disk (cf); - } - for (; j < case_cnt; j++) - read_and_verify_random_case (cf, r2, j); - break; - } - if (casereader_read (r1, &c)) - fail_test ("Casereader 1 not at end of file."); - if (casereader_read (r2, &c)) - fail_test ("Casereader 2 not at end of file."); - if (pattern != 1) - casereader_destroy (r1); - if (pattern != 2) - casereader_destroy (r2); - if (pattern > 2) - { - r1 = casefile_get_destructive_reader (cf); - for (i = 0; i < case_cnt; i++) - { - struct ccase read_case, expected_case; - - get_random_case (&expected_case, value_cnt, i); - if (!casereader_read_xfer (r1, &read_case)) - fail_test ("Premature end of casefile."); - for (j = 0; j < value_cnt; j++) - { - double a = case_num (&read_case, j); - double b = case_num (&expected_case, j); - if (a != b) - fail_test ("Case %lu fails comparison.", (unsigned long) i); - } - case_destroy (&expected_case); - case_destroy (&read_case); - } - casereader_destroy (r1); - } - casefile_destroy (cf); - gsl_rng_free (rng); -} - -static void -get_random_case (struct ccase *c, size_t value_cnt, size_t case_idx) -{ - int i; - case_create (c, value_cnt); - for (i = 0; i < value_cnt; i++) - case_data_rw (c, i)->f = case_idx % 257 + i; -} - -static void -write_random_case (struct casefile *cf, size_t case_idx) -{ - struct ccase c; - get_random_case (&c, casefile_get_value_cnt (cf), case_idx); - casefile_append_xfer (cf, &c); -} - -static void -read_and_verify_random_case (struct casefile *cf, - struct casereader *reader, size_t case_idx) -{ - struct ccase read_case, expected_case; - size_t value_cnt; - size_t i; - - value_cnt = casefile_get_value_cnt (cf); - get_random_case (&expected_case, value_cnt, case_idx); - if (!casereader_read (reader, &read_case)) - fail_test ("Premature end of casefile."); - for (i = 0; i < value_cnt; i++) - { - double a = case_num (&read_case, i); - double b = case_num (&expected_case, i); - if (a != b) - fail_test ("Case %lu fails comparison.", (unsigned long) case_idx); - } - case_destroy (&read_case); - case_destroy (&expected_case); -} - -static void -fail_test (const char *message, ...) -{ - va_list args; - - va_start (args, message); - vprintf (message, args); - putchar ('\n'); - va_end (args); - - exit (1); -}