1 /* PSPP - computes sample statistics.
2 Copyright (C) 2004 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
37 #define IO_BUF_SIZE (8192 / sizeof (union value))
39 /* A casefile is a sequentially accessible array of immutable
40 cases. It may be stored in memory or on disk as workspace
41 allows. Cases may be appended to the end of the file. Cases
42 may be read sequentially starting from the beginning of the
43 file. Once any cases have been read, no more cases may be
44 appended. The entire file is discarded at once. */
46 /* In-memory cases are arranged in an array of arrays. The top
47 level is variable size and the size of each bottom level array
48 is fixed at the number of cases defined here. */
49 #define CASES_PER_BLOCK 128
55 struct casefile *next, *prev; /* Next, prev in global list. */
56 size_t value_cnt; /* Case size in `union value's. */
57 size_t case_acct_size; /* Case size for accounting. */
58 unsigned long case_cnt; /* Number of cases stored. */
59 enum { MEMORY, DISK } storage; /* Where cases are stored. */
60 enum { WRITE, READ } mode; /* Is writing or reading allowed? */
61 struct casereader *readers; /* List of our readers. */
62 int being_destroyed; /* Does a destructive reader exist? */
65 struct ccase **cases; /* Pointer to array of cases. */
68 int fd; /* File descriptor, -1 if none. */
69 char *filename; /* Filename. */
70 union value *buffer; /* I/O buffer, NULL if none. */
71 size_t buffer_used; /* Number of values used in buffer. */
72 size_t buffer_size; /* Buffer size in values. */
75 /* For reading out the cases in a casefile. */
78 struct casereader *next, *prev; /* Next, prev in casefile's list. */
79 struct casefile *cf; /* Our casefile. */
80 unsigned long case_idx; /* Case number of current case. */
81 int destructive; /* Is this a destructive reader? */
84 int fd; /* File descriptor. */
85 union value *buffer; /* I/O buffer. */
86 size_t buffer_pos; /* Offset of buffer position. */
87 struct ccase c; /* Current case. */
90 /* Return the case number of the current case */
92 casereader_cnum(const struct casereader *r)
97 /* Doubly linked list of all casefiles. */
98 static struct casefile *casefiles;
100 /* Number of bytes of case allocated in in-memory casefiles. */
101 static size_t case_bytes;
103 static void register_atexit (void);
104 static void exit_handler (void);
106 static void reader_open_file (struct casereader *reader);
107 static void write_case_to_disk (struct casefile *cf, const struct ccase *c);
108 static void flush_buffer (struct casefile *cf);
109 static void fill_buffer (struct casereader *reader);
111 static int safe_open (const char *filename, int flags);
112 static int safe_close (int fd);
113 static int full_read (int fd, void *buffer, size_t size);
114 static int full_write (int fd, const void *buffer, size_t size);
116 /* Creates and returns a casefile to store cases of VALUE_CNT
117 `union value's each. */
119 casefile_create (size_t value_cnt)
121 struct casefile *cf = xmalloc (sizeof *cf);
122 cf->next = casefiles;
124 if (cf->next != NULL)
127 cf->value_cnt = value_cnt;
128 cf->case_acct_size = (cf->value_cnt + 4) * sizeof *cf->buffer;
130 cf->storage = MEMORY;
133 cf->being_destroyed = 0;
138 cf->buffer_size = ROUND_UP (cf->value_cnt, IO_BUF_SIZE);
139 if (cf->value_cnt > 0 && cf->buffer_size % cf->value_cnt > 64)
140 cf->buffer_size = cf->value_cnt;
146 /* Destroys casefile CF. */
148 casefile_destroy (struct casefile *cf)
152 if (cf->next != NULL)
153 cf->next->prev = cf->prev;
154 if (cf->prev != NULL)
155 cf->prev->next = cf->next;
157 casefiles = cf->next;
159 while (cf->readers != NULL)
160 casereader_destroy (cf->readers);
162 if (cf->cases != NULL)
164 size_t idx, block_cnt;
166 case_bytes -= cf->case_cnt * cf->case_acct_size;
167 for (idx = 0; idx < cf->case_cnt; idx++)
169 size_t block_idx = idx / CASES_PER_BLOCK;
170 size_t case_idx = idx % CASES_PER_BLOCK;
171 struct ccase *c = &cf->cases[block_idx][case_idx];
175 block_cnt = DIV_RND_UP (cf->case_cnt, CASES_PER_BLOCK);
176 for (idx = 0; idx < block_cnt; idx++)
177 free (cf->cases[idx]);
185 if (cf->filename != NULL && remove (cf->filename) == -1)
186 msg (ME, _("%s: Removing temporary file: %s."),
187 cf->filename, strerror (errno));
196 /* Returns nonzero only if casefile CF is stored in memory (instead of on
199 casefile_in_core (const struct casefile *cf)
203 return cf->storage == MEMORY;
206 /* Puts a casefile to "sleep", that is, minimizes the resources
207 needed for it by closing its file descriptor and freeing its
208 buffer. This is useful if we need so many casefiles that we
209 might not have enough memory and file descriptors to go
212 For simplicity, this implementation always converts the
213 casefile to reader mode. If this turns out to be a problem,
214 with a little extra work we could also support sleeping
217 casefile_sleep (const struct casefile *cf_)
219 struct casefile *cf = (struct casefile *) cf_;
222 casefile_mode_reader (cf);
223 casefile_to_disk (cf);
231 if (cf->buffer != NULL)
238 /* Returns the number of `union value's in a case for CF. */
240 casefile_get_value_cnt (const struct casefile *cf)
244 return cf->value_cnt;
247 /* Returns the number of cases in casefile CF. */
249 casefile_get_case_cnt (const struct casefile *cf)
256 /* Appends a copy of case C to casefile CF. Not valid after any
257 reader for CF has been created. */
259 casefile_append (struct casefile *cf, const struct ccase *c)
263 assert (cf->mode == WRITE);
265 /* Try memory first. */
266 if (cf->storage == MEMORY)
268 if (case_bytes < get_max_workspace ())
270 size_t block_idx = cf->case_cnt / CASES_PER_BLOCK;
271 size_t case_idx = cf->case_cnt % CASES_PER_BLOCK;
272 struct ccase new_case;
274 case_bytes += cf->case_acct_size;
275 case_clone (&new_case, c);
278 if ((block_idx & (block_idx - 1)) == 0)
280 size_t block_cap = block_idx == 0 ? 1 : block_idx * 2;
281 cf->cases = xrealloc (cf->cases,
282 sizeof *cf->cases * block_cap);
285 cf->cases[block_idx] = xmalloc (sizeof **cf->cases
289 case_move (&cf->cases[block_idx][case_idx], &new_case);
293 casefile_to_disk (cf);
294 assert (cf->storage == DISK);
295 write_case_to_disk (cf, c);
299 write_case_to_disk (cf, c);
304 /* Appends case C to casefile CF, which takes over ownership of
305 C. Not valid after any reader for CF has been created. */
307 casefile_append_xfer (struct casefile *cf, struct ccase *c)
309 casefile_append (cf, c);
313 /* Writes case C to casefile CF's disk buffer, first flushing the buffer to
314 disk if it would otherwise overflow. */
316 write_case_to_disk (struct casefile *cf, const struct ccase *c)
318 case_to_values (c, cf->buffer + cf->buffer_used, cf->value_cnt);
319 cf->buffer_used += cf->value_cnt;
320 if (cf->buffer_used + cf->value_cnt > cf->buffer_size)
324 /* If any bytes in CF's output buffer are used, flush them to
327 flush_buffer (struct casefile *cf)
329 if (cf->buffer_used > 0)
331 if (!full_write (cf->fd, cf->buffer,
332 cf->buffer_size * sizeof *cf->buffer))
333 msg (FE, _("Error writing temporary file: %s."), strerror (errno));
340 /* If CF is currently stored in memory, writes it to disk. Readers, if any,
341 retain their current positions. */
343 casefile_to_disk (const struct casefile *cf_)
345 struct casefile *cf = (struct casefile *) cf_;
346 struct casereader *reader;
350 if (cf->storage == MEMORY)
352 size_t idx, block_cnt;
354 assert (cf->filename == NULL);
355 assert (cf->fd == -1);
356 assert (cf->buffer_used == 0);
359 if (!make_temp_file (&cf->fd, &cf->filename))
361 cf->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer);
362 memset (cf->buffer, 0, cf->buffer_size * sizeof *cf->buffer);
364 case_bytes -= cf->case_cnt * cf->case_acct_size;
365 for (idx = 0; idx < cf->case_cnt; idx++)
367 size_t block_idx = idx / CASES_PER_BLOCK;
368 size_t case_idx = idx % CASES_PER_BLOCK;
369 struct ccase *c = &cf->cases[block_idx][case_idx];
370 write_case_to_disk (cf, c);
374 block_cnt = DIV_RND_UP (cf->case_cnt, CASES_PER_BLOCK);
375 for (idx = 0; idx < block_cnt; idx++)
376 free (cf->cases[idx]);
381 if (cf->mode == READ)
384 for (reader = cf->readers; reader != NULL; reader = reader->next)
385 reader_open_file (reader);
389 /* Changes CF to reader mode, ensuring that no more cases may be
390 added. Creating a casereader for CF has the same effect. */
392 casefile_mode_reader (struct casefile *cf)
398 /* Creates and returns a casereader for CF. A casereader can be used to
399 sequentially read the cases in a casefile. */
401 casefile_get_reader (const struct casefile *cf_)
403 struct casefile *cf = (struct casefile *) cf_;
404 struct casereader *reader;
407 assert (!cf->being_destroyed);
409 /* Flush the buffer to disk if it's not empty. */
410 if (cf->mode == WRITE && cf->storage == DISK)
415 reader = xmalloc (sizeof *reader);
416 reader->next = cf->readers;
417 if (cf->readers != NULL)
418 reader->next->prev = reader;
419 cf->readers = reader;
422 reader->case_idx = 0;
423 reader->destructive = 0;
425 reader->buffer = NULL;
426 reader->buffer_pos = 0;
427 case_nullify (&reader->c);
429 if (reader->cf->storage == DISK)
430 reader_open_file (reader);
435 /* Creates and returns a destructive casereader for CF. Like a
436 normal casereader, a destructive casereader sequentially reads
437 the cases in a casefile. Unlike a normal casereader, a
438 destructive reader cannot operate concurrently with any other
439 reader. (This restriction could be relaxed in a few ways, but
440 it is so far unnecessary for other code.) */
442 casefile_get_destructive_reader (struct casefile *cf)
444 struct casereader *reader;
446 assert (cf->readers == NULL);
447 reader = casefile_get_reader (cf);
448 reader->destructive = 1;
449 cf->being_destroyed = 1;
453 /* Opens a disk file for READER and seeks to the current position as indicated
454 by case_idx. Normally the current position is the beginning of the file,
455 but casefile_to_disk may cause the file to be opened at a different
458 reader_open_file (struct casereader *reader)
460 struct casefile *cf = reader->cf;
463 if (reader->case_idx >= cf->case_cnt)
473 reader->fd = safe_open (cf->filename, O_RDONLY);
475 msg (FE, _("%s: Opening temporary file: %s."),
476 cf->filename, strerror (errno));
479 if (cf->buffer != NULL)
481 reader->buffer = cf->buffer;
486 reader->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer);
487 memset (reader->buffer, 0, cf->buffer_size * sizeof *cf->buffer);
490 if (cf->value_cnt != 0)
492 size_t buffer_case_cnt = cf->buffer_size / cf->value_cnt;
493 file_ofs = ((off_t) reader->case_idx / buffer_case_cnt
494 * cf->buffer_size * sizeof *cf->buffer);
495 reader->buffer_pos = (reader->case_idx % buffer_case_cnt
500 if (lseek (reader->fd, file_ofs, SEEK_SET) != file_ofs)
501 msg (FE, _("%s: Seeking temporary file: %s."),
502 cf->filename, strerror (errno));
504 if (cf->case_cnt > 0 && cf->value_cnt > 0)
505 fill_buffer (reader);
507 case_create (&reader->c, cf->value_cnt);
510 /* Fills READER's buffer by reading a block from disk. */
512 fill_buffer (struct casereader *reader)
514 int retval = full_read (reader->fd, reader->buffer,
515 reader->cf->buffer_size * sizeof *reader->buffer);
517 msg (FE, _("%s: Reading temporary file: %s."),
518 reader->cf->filename, strerror (errno));
519 else if (retval != reader->cf->buffer_size * sizeof *reader->buffer)
520 msg (FE, _("%s: Temporary file ended unexpectedly."),
521 reader->cf->filename);
524 /* Returns the casefile that READER reads. */
525 const struct casefile *
526 casereader_get_casefile (const struct casereader *reader)
528 assert (reader != NULL);
533 /* Reads a copy of the next case from READER into C.
534 Caller is responsible for destroying C.
535 Returns true if successful, false at end of file. */
537 casereader_read (struct casereader *reader, struct ccase *c)
539 assert (reader != NULL);
541 if (reader->case_idx >= reader->cf->case_cnt)
544 if (reader->cf->storage == MEMORY)
546 size_t block_idx = reader->case_idx / CASES_PER_BLOCK;
547 size_t case_idx = reader->case_idx % CASES_PER_BLOCK;
549 case_clone (c, &reader->cf->cases[block_idx][case_idx]);
555 if (reader->buffer_pos + reader->cf->value_cnt > reader->cf->buffer_size)
557 fill_buffer (reader);
558 reader->buffer_pos = 0;
561 case_from_values (&reader->c, reader->buffer + reader->buffer_pos,
562 reader->cf->value_cnt);
563 reader->buffer_pos += reader->cf->value_cnt;
566 case_clone (c, &reader->c);
571 /* Reads the next case from READER into C and transfers ownership
572 to the caller. Caller is responsible for destroying C.
573 Returns true if successful, false at end of file. */
575 casereader_read_xfer (struct casereader *reader, struct ccase *c)
577 assert (reader != NULL);
579 if (reader->destructive == 0
580 || reader->case_idx >= reader->cf->case_cnt
581 || reader->cf->storage == DISK)
582 return casereader_read (reader, c);
585 size_t block_idx = reader->case_idx / CASES_PER_BLOCK;
586 size_t case_idx = reader->case_idx % CASES_PER_BLOCK;
587 struct ccase *read_case = &reader->cf->cases[block_idx][case_idx];
589 case_move (c, read_case);
595 /* Reads the next case from READER into C and transfers ownership
596 to the caller. Caller is responsible for destroying C.
597 Assert-fails at end of file. */
599 casereader_read_xfer_assert (struct casereader *reader, struct ccase *c)
601 bool success = casereader_read_xfer (reader, c);
605 /* Destroys READER. */
607 casereader_destroy (struct casereader *reader)
609 assert (reader != NULL);
611 if (reader->next != NULL)
612 reader->next->prev = reader->prev;
613 if (reader->prev != NULL)
614 reader->prev->next = reader->next;
615 if (reader->cf->readers == reader)
616 reader->cf->readers = reader->next;
618 if (reader->cf->buffer == NULL)
619 reader->cf->buffer = reader->buffer;
621 free (reader->buffer);
623 if (reader->fd != -1)
625 if (reader->cf->fd == -1)
626 reader->cf->fd = reader->fd;
628 safe_close (reader->fd);
631 case_destroy (&reader->c);
636 /* Calls open(), passing FILENAME and FLAGS, repeating as necessary
637 to deal with interrupted calls. */
639 safe_open (const char *filename, int flags)
645 fd = open (filename, flags);
647 while (fd == -1 && errno == EINTR);
652 /* Calls close(), passing FD, repeating as necessary to deal with
653 interrupted calls. */
654 static int safe_close (int fd)
662 while (retval == -1 && errno == EINTR);
667 /* Calls read(), passing FD, BUFFER, and SIZE, repeating as
668 necessary to deal with interrupted calls. */
670 full_read (int fd, void *buffer_, size_t size)
672 char *buffer = buffer_;
673 size_t bytes_read = 0;
675 while (bytes_read < size)
677 int retval = read (fd, buffer + bytes_read, size - bytes_read);
679 bytes_read += retval;
680 else if (retval == 0)
682 else if (errno != EINTR)
689 /* Calls write(), passing FD, BUFFER, and SIZE, repeating as
690 necessary to deal with interrupted calls. */
692 full_write (int fd, const void *buffer_, size_t size)
694 const char *buffer = buffer_;
695 size_t bytes_written = 0;
697 while (bytes_written < size)
699 int retval = write (fd, buffer + bytes_written, size - bytes_written);
701 bytes_written += retval;
702 else if (errno != EINTR)
706 return bytes_written;
710 /* Registers our exit handler with atexit() if it has not already
713 register_atexit (void)
715 static int registered = 0;
719 atexit (exit_handler);
725 /* atexit() handler that closes and deletes our temporary
730 while (casefiles != NULL)
731 casefile_destroy (casefiles);
734 #include <gsl/gsl_rng.h>
739 static void test_casefile (int pattern, size_t value_cnt, size_t case_cnt);
740 static void get_random_case (struct ccase *, size_t value_cnt,
742 static void write_random_case (struct casefile *cf, size_t case_idx);
743 static void read_and_verify_random_case (struct casefile *cf,
744 struct casereader *reader,
746 static void fail_test (const char *message, ...);
749 cmd_debug_casefile (void)
751 static const size_t sizes[] =
753 1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 31, 55, 73,
754 100, 137, 257, 521, 1031, 2053
760 size_max = sizeof sizes / sizeof *sizes;
761 if (lex_match_id ("SMALL"))
769 return lex_end_of_command ();
771 for (pattern = 0; pattern < 6; pattern++)
775 for (size = sizes; size < sizes + size_max; size++)
779 for (case_cnt = 0; case_cnt <= case_max;
780 case_cnt = (case_cnt * 2) + 1)
781 test_casefile (pattern, *size, case_cnt);
784 printf ("Casefile tests succeeded.\n");
789 test_casefile (int pattern, size_t value_cnt, size_t case_cnt)
792 struct casereader *r1, *r2;
797 rng = gsl_rng_alloc (gsl_rng_mt19937);
798 cf = casefile_create (value_cnt);
800 casefile_to_disk (cf);
801 for (i = 0; i < case_cnt; i++)
802 write_random_case (cf, i);
805 r1 = casefile_get_reader (cf);
806 r2 = casefile_get_reader (cf);
811 for (i = 0; i < case_cnt; i++)
813 read_and_verify_random_case (cf, r1, i);
814 read_and_verify_random_case (cf, r2, i);
818 for (i = 0; i < case_cnt; i++)
819 read_and_verify_random_case (cf, r1, i);
820 for (i = 0; i < case_cnt; i++)
821 read_and_verify_random_case (cf, r2, i);
826 for (i = j = 0; i < case_cnt; i++)
828 read_and_verify_random_case (cf, r1, i);
829 if (gsl_rng_get (rng) % pattern == 0)
830 read_and_verify_random_case (cf, r2, j++);
831 if (i == case_cnt / 2)
832 casefile_to_disk (cf);
834 for (; j < case_cnt; j++)
835 read_and_verify_random_case (cf, r2, j);
838 if (casereader_read (r1, &c))
839 fail_test ("Casereader 1 not at end of file.");
840 if (casereader_read (r2, &c))
841 fail_test ("Casereader 2 not at end of file.");
843 casereader_destroy (r1);
845 casereader_destroy (r2);
848 r1 = casefile_get_destructive_reader (cf);
849 for (i = 0; i < case_cnt; i++)
851 struct ccase read_case, expected_case;
853 get_random_case (&expected_case, value_cnt, i);
854 if (!casereader_read_xfer (r1, &read_case))
855 fail_test ("Premature end of casefile.");
856 for (j = 0; j < value_cnt; j++)
858 double a = case_num (&read_case, j);
859 double b = case_num (&expected_case, j);
861 fail_test ("Case %lu fails comparison.", (unsigned long) i);
863 case_destroy (&expected_case);
864 case_destroy (&read_case);
866 casereader_destroy (r1);
868 casefile_destroy (cf);
873 get_random_case (struct ccase *c, size_t value_cnt, size_t case_idx)
876 case_create (c, value_cnt);
877 for (i = 0; i < value_cnt; i++)
878 case_data_rw (c, i)->f = case_idx % 257 + i;
882 write_random_case (struct casefile *cf, size_t case_idx)
885 get_random_case (&c, casefile_get_value_cnt (cf), case_idx);
886 casefile_append_xfer (cf, &c);
890 read_and_verify_random_case (struct casefile *cf,
891 struct casereader *reader, size_t case_idx)
893 struct ccase read_case, expected_case;
897 value_cnt = casefile_get_value_cnt (cf);
898 get_random_case (&expected_case, value_cnt, case_idx);
899 if (!casereader_read (reader, &read_case))
900 fail_test ("Premature end of casefile.");
901 for (i = 0; i < value_cnt; i++)
903 double a = case_num (&read_case, i);
904 double b = case_num (&expected_case, i);
906 fail_test ("Case %lu fails comparison.", (unsigned long) case_idx);
908 case_destroy (&read_case);
909 case_destroy (&expected_case);
913 fail_test (const char *message, ...)
917 va_start (args, message);
918 vprintf (message, args);