1 /* PSPP - computes sample statistics.
2 Copyright (C) 2004 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
36 #ifdef HAVE_VALGRIND_VALGRIND_H
37 #include <valgrind/valgrind.h>
40 #define IO_BUF_SIZE (8192 / sizeof (union value))
42 /* A casefile is a sequentially accessible array of immutable
43 cases. It may be stored in memory or on disk as workspace
44 allows. Cases may be appended to the end of the file. Cases
45 may be read sequentially starting from the beginning of the
46 file. Once any cases have been read, no more cases may be
47 appended. The entire file is discarded at once. */
49 /* In-memory cases are arranged in an array of arrays. The top
50 level is variable size and the size of each bottom level array
51 is fixed at the number of cases defined here. */
52 #define CASES_PER_BLOCK 128
58 struct casefile *next, *prev; /* Next, prev in global list. */
59 size_t value_cnt; /* Case size in `union value's. */
60 size_t case_acct_size; /* Case size for accounting. */
61 unsigned long case_cnt; /* Number of cases stored. */
62 enum { MEMORY, DISK } storage; /* Where cases are stored. */
63 enum { WRITE, READ } mode; /* Is writing or reading allowed? */
64 struct casereader *readers; /* List of our readers. */
65 int being_destroyed; /* Does a destructive reader exist? */
68 struct ccase **cases; /* Pointer to array of cases. */
71 int fd; /* File descriptor, -1 if none. */
72 char *filename; /* Filename. */
73 union value *buffer; /* I/O buffer, NULL if none. */
74 size_t buffer_used; /* Number of values used in buffer. */
75 size_t buffer_size; /* Buffer size in values. */
78 /* For reading out the cases in a casefile. */
81 struct casereader *next, *prev; /* Next, prev in casefile's list. */
82 struct casefile *cf; /* Our casefile. */
83 unsigned long case_idx; /* Case number of current case. */
84 int destructive; /* Is this a destructive reader? */
87 int fd; /* File descriptor. */
88 union value *buffer; /* I/O buffer. */
89 size_t buffer_pos; /* Offset of buffer position. */
90 struct ccase c; /* Current case. */
93 /* Return the case number of the current case */
95 casereader_cnum(const struct casereader *r)
100 /* Doubly linked list of all casefiles. */
101 static struct casefile *casefiles;
103 /* Number of bytes of case allocated in in-memory casefiles. */
104 static size_t case_bytes;
106 static void register_atexit (void);
107 static void exit_handler (void);
109 static void reader_open_file (struct casereader *reader);
110 static void write_case_to_disk (struct casefile *cf, const struct ccase *c);
111 static void flush_buffer (struct casefile *cf);
112 static void fill_buffer (struct casereader *reader);
114 static int safe_open (const char *filename, int flags);
115 static int safe_close (int fd);
116 static int full_read (int fd, void *buffer, size_t size);
117 static int full_write (int fd, const void *buffer, size_t size);
119 /* Creates and returns a casefile to store cases of VALUE_CNT
120 `union value's each. */
122 casefile_create (size_t value_cnt)
124 struct casefile *cf = xmalloc (sizeof *cf);
125 cf->next = casefiles;
127 if (cf->next != NULL)
130 cf->value_cnt = value_cnt;
131 cf->case_acct_size = (cf->value_cnt + 4) * sizeof *cf->buffer;
133 cf->storage = MEMORY;
136 cf->being_destroyed = 0;
141 cf->buffer_size = ROUND_UP (cf->value_cnt, IO_BUF_SIZE);
142 if (cf->value_cnt > 0 && cf->buffer_size % cf->value_cnt > 64)
143 cf->buffer_size = cf->value_cnt;
149 /* Destroys casefile CF. */
151 casefile_destroy (struct casefile *cf)
155 if (cf->next != NULL)
156 cf->next->prev = cf->prev;
157 if (cf->prev != NULL)
158 cf->prev->next = cf->next;
160 casefiles = cf->next;
162 while (cf->readers != NULL)
163 casereader_destroy (cf->readers);
165 if (cf->cases != NULL)
167 size_t idx, block_cnt;
169 case_bytes -= cf->case_cnt * cf->case_acct_size;
170 for (idx = 0; idx < cf->case_cnt; idx++)
172 size_t block_idx = idx / CASES_PER_BLOCK;
173 size_t case_idx = idx % CASES_PER_BLOCK;
174 struct ccase *c = &cf->cases[block_idx][case_idx];
178 block_cnt = DIV_RND_UP (cf->case_cnt, CASES_PER_BLOCK);
179 for (idx = 0; idx < block_cnt; idx++)
180 free (cf->cases[idx]);
188 if (cf->filename != NULL && remove (cf->filename) == -1)
189 msg (ME, _("%s: Removing temporary file: %s."),
190 cf->filename, strerror (errno));
199 /* Returns nonzero only if casefile CF is stored in memory (instead of on
202 casefile_in_core (const struct casefile *cf)
206 return cf->storage == MEMORY;
209 /* Puts a casefile to "sleep", that is, minimizes the resources
210 needed for it by closing its file descriptor and freeing its
211 buffer. This is useful if we need so many casefiles that we
212 might not have enough memory and file descriptors to go
215 For simplicity, this implementation always converts the
216 casefile to reader mode. If this turns out to be a problem,
217 with a little extra work we could also support sleeping
220 casefile_sleep (const struct casefile *cf_)
222 struct casefile *cf = (struct casefile *) cf_;
225 casefile_mode_reader (cf);
226 casefile_to_disk (cf);
234 if (cf->buffer != NULL)
241 /* Returns the number of `union value's in a case for CF. */
243 casefile_get_value_cnt (const struct casefile *cf)
247 return cf->value_cnt;
250 /* Returns the number of cases in casefile CF. */
252 casefile_get_case_cnt (const struct casefile *cf)
259 /* Appends a copy of case C to casefile CF. Not valid after any
260 reader for CF has been created. */
262 casefile_append (struct casefile *cf, const struct ccase *c)
266 assert (cf->mode == WRITE);
268 /* Try memory first. */
269 if (cf->storage == MEMORY)
271 if (case_bytes < get_max_workspace ())
273 size_t block_idx = cf->case_cnt / CASES_PER_BLOCK;
274 size_t case_idx = cf->case_cnt % CASES_PER_BLOCK;
275 struct ccase new_case;
277 case_bytes += cf->case_acct_size;
278 case_clone (&new_case, c);
281 if ((block_idx & (block_idx - 1)) == 0)
283 size_t block_cap = block_idx == 0 ? 1 : block_idx * 2;
284 cf->cases = xrealloc (cf->cases,
285 sizeof *cf->cases * block_cap);
288 cf->cases[block_idx] = xmalloc (sizeof **cf->cases
292 case_move (&cf->cases[block_idx][case_idx], &new_case);
296 casefile_to_disk (cf);
297 assert (cf->storage == DISK);
298 write_case_to_disk (cf, c);
302 write_case_to_disk (cf, c);
307 /* Appends case C to casefile CF, which takes over ownership of
308 C. Not valid after any reader for CF has been created. */
310 casefile_append_xfer (struct casefile *cf, struct ccase *c)
312 casefile_append (cf, c);
316 /* Writes case C to casefile CF's disk buffer, first flushing the buffer to
317 disk if it would otherwise overflow. */
319 write_case_to_disk (struct casefile *cf, const struct ccase *c)
321 case_to_values (c, cf->buffer + cf->buffer_used, cf->value_cnt);
322 cf->buffer_used += cf->value_cnt;
323 if (cf->buffer_used + cf->value_cnt > cf->buffer_size)
327 /* If any bytes in CF's output buffer are used, flush them to
330 flush_buffer (struct casefile *cf)
332 if (cf->buffer_used > 0)
334 if (!full_write (cf->fd, cf->buffer,
335 cf->buffer_size * sizeof *cf->buffer))
336 msg (FE, _("Error writing temporary file: %s."), strerror (errno));
342 /* Creates a temporary file and stores its name in *FILENAME and
343 a file descriptor for it in *FD. Returns success. Caller is
344 responsible for freeing *FILENAME. */
346 make_temp_file (int *fd, char **filename)
348 const char *parent_dir;
350 assert (filename != NULL);
353 if (getenv ("TMPDIR") != NULL)
354 parent_dir = getenv ("TMPDIR");
356 parent_dir = P_tmpdir;
358 *filename = xmalloc (strlen (parent_dir) + 32);
359 sprintf (*filename, "%s%cpsppXXXXXX", parent_dir, DIR_SEPARATOR);
360 *fd = mkstemp (*filename);
363 msg (FE, _("%s: Creating temporary file: %s."),
364 *filename, strerror (errno));
372 /* If CF is currently stored in memory, writes it to disk. Readers, if any,
373 retain their current positions. */
375 casefile_to_disk (const struct casefile *cf_)
377 struct casefile *cf = (struct casefile *) cf_;
378 struct casereader *reader;
382 if (cf->storage == MEMORY)
384 size_t idx, block_cnt;
386 assert (cf->filename == NULL);
387 assert (cf->fd == -1);
388 assert (cf->buffer_used == 0);
391 if (!make_temp_file (&cf->fd, &cf->filename))
393 cf->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer);
394 memset (cf->buffer, 0, cf->buffer_size * sizeof *cf->buffer);
396 case_bytes -= cf->case_cnt * cf->case_acct_size;
397 for (idx = 0; idx < cf->case_cnt; idx++)
399 size_t block_idx = idx / CASES_PER_BLOCK;
400 size_t case_idx = idx % CASES_PER_BLOCK;
401 struct ccase *c = &cf->cases[block_idx][case_idx];
402 write_case_to_disk (cf, c);
406 block_cnt = DIV_RND_UP (cf->case_cnt, CASES_PER_BLOCK);
407 for (idx = 0; idx < block_cnt; idx++)
408 free (cf->cases[idx]);
413 if (cf->mode == READ)
416 for (reader = cf->readers; reader != NULL; reader = reader->next)
417 reader_open_file (reader);
421 /* Changes CF to reader mode, ensuring that no more cases may be
422 added. Creating a casereader for CF has the same effect. */
424 casefile_mode_reader (struct casefile *cf)
430 /* Creates and returns a casereader for CF. A casereader can be used to
431 sequentially read the cases in a casefile. */
433 casefile_get_reader (const struct casefile *cf_)
435 struct casefile *cf = (struct casefile *) cf_;
436 struct casereader *reader;
439 assert (!cf->being_destroyed);
441 /* Flush the buffer to disk if it's not empty. */
442 if (cf->mode == WRITE && cf->storage == DISK)
447 reader = xmalloc (sizeof *reader);
448 reader->next = cf->readers;
449 if (cf->readers != NULL)
450 reader->next->prev = reader;
451 cf->readers = reader;
454 reader->case_idx = 0;
455 reader->destructive = 0;
457 reader->buffer = NULL;
458 reader->buffer_pos = 0;
459 case_nullify (&reader->c);
461 if (reader->cf->storage == DISK)
462 reader_open_file (reader);
467 /* Creates and returns a destructive casereader for CF. Like a
468 normal casereader, a destructive casereader sequentially reads
469 the cases in a casefile. Unlike a normal casereader, a
470 destructive reader cannot operate concurrently with any other
471 reader. (This restriction could be relaxed in a few ways, but
472 it is so far unnecessary for other code.) */
474 casefile_get_destructive_reader (struct casefile *cf)
476 struct casereader *reader;
478 assert (cf->readers == NULL);
479 reader = casefile_get_reader (cf);
480 reader->destructive = 1;
481 cf->being_destroyed = 1;
485 /* Opens a disk file for READER and seeks to the current position as indicated
486 by case_idx. Normally the current position is the beginning of the file,
487 but casefile_to_disk may cause the file to be opened at a different
490 reader_open_file (struct casereader *reader)
492 struct casefile *cf = reader->cf;
495 if (reader->case_idx >= cf->case_cnt)
505 reader->fd = safe_open (cf->filename, O_RDONLY);
507 msg (FE, _("%s: Opening temporary file: %s."),
508 cf->filename, strerror (errno));
511 if (cf->buffer != NULL)
513 reader->buffer = cf->buffer;
518 reader->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer);
519 memset (reader->buffer, 0, cf->buffer_size * sizeof *cf->buffer);
522 if (cf->value_cnt != 0)
524 size_t buffer_case_cnt = cf->buffer_size / cf->value_cnt;
525 file_ofs = ((off_t) reader->case_idx / buffer_case_cnt
526 * cf->buffer_size * sizeof *cf->buffer);
527 reader->buffer_pos = (reader->case_idx % buffer_case_cnt
532 if (lseek (reader->fd, file_ofs, SEEK_SET) != file_ofs)
533 msg (FE, _("%s: Seeking temporary file: %s."),
534 cf->filename, strerror (errno));
536 if (cf->case_cnt > 0 && cf->value_cnt > 0)
537 fill_buffer (reader);
539 case_create (&reader->c, cf->value_cnt);
542 /* Fills READER's buffer by reading a block from disk. */
544 fill_buffer (struct casereader *reader)
546 int retval = full_read (reader->fd, reader->buffer,
547 reader->cf->buffer_size * sizeof *reader->buffer);
549 msg (FE, _("%s: Reading temporary file: %s."),
550 reader->cf->filename, strerror (errno));
551 else if (retval != reader->cf->buffer_size * sizeof *reader->buffer)
552 msg (FE, _("%s: Temporary file ended unexpectedly."),
553 reader->cf->filename);
556 /* Returns the casefile that READER reads. */
557 const struct casefile *
558 casereader_get_casefile (const struct casereader *reader)
560 assert (reader != NULL);
565 /* Reads a copy of the next case from READER into C.
566 Caller is responsible for destroying C. */
568 casereader_read (struct casereader *reader, struct ccase *c)
570 assert (reader != NULL);
572 if (reader->case_idx >= reader->cf->case_cnt)
575 if (reader->cf->storage == MEMORY)
577 size_t block_idx = reader->case_idx / CASES_PER_BLOCK;
578 size_t case_idx = reader->case_idx % CASES_PER_BLOCK;
580 case_clone (c, &reader->cf->cases[block_idx][case_idx]);
586 if (reader->buffer_pos + reader->cf->value_cnt > reader->cf->buffer_size)
588 fill_buffer (reader);
589 reader->buffer_pos = 0;
592 case_from_values (&reader->c, reader->buffer + reader->buffer_pos,
593 reader->cf->value_cnt);
594 reader->buffer_pos += reader->cf->value_cnt;
597 case_clone (c, &reader->c);
602 /* Reads the next case from READER into C and transfers ownership
603 to the caller. Caller is responsible for destroying C. */
605 casereader_read_xfer (struct casereader *reader, struct ccase *c)
607 assert (reader != NULL);
609 if (reader->destructive == 0
610 || reader->case_idx >= reader->cf->case_cnt
611 || reader->cf->storage == DISK)
612 return casereader_read (reader, c);
615 size_t block_idx = reader->case_idx / CASES_PER_BLOCK;
616 size_t case_idx = reader->case_idx % CASES_PER_BLOCK;
617 struct ccase *read_case = &reader->cf->cases[block_idx][case_idx];
619 case_move (c, read_case);
625 /* Destroys READER. */
627 casereader_destroy (struct casereader *reader)
629 assert (reader != NULL);
631 if (reader->next != NULL)
632 reader->next->prev = reader->prev;
633 if (reader->prev != NULL)
634 reader->prev->next = reader->next;
635 if (reader->cf->readers == reader)
636 reader->cf->readers = reader->next;
638 if (reader->cf->buffer == NULL)
639 reader->cf->buffer = reader->buffer;
641 free (reader->buffer);
643 if (reader->fd != -1)
645 if (reader->cf->fd == -1)
646 reader->cf->fd = reader->fd;
648 safe_close (reader->fd);
651 case_destroy (&reader->c);
656 /* Calls open(), passing FILENAME and FLAGS, repeating as necessary
657 to deal with interrupted calls. */
659 safe_open (const char *filename, int flags)
665 fd = open (filename, flags);
667 while (fd == -1 && errno == EINTR);
672 /* Calls close(), passing FD, repeating as necessary to deal with
673 interrupted calls. */
674 static int safe_close (int fd)
682 while (retval == -1 && errno == EINTR);
687 /* Calls read(), passing FD, BUFFER, and SIZE, repeating as
688 necessary to deal with interrupted calls. */
690 full_read (int fd, void *buffer_, size_t size)
692 char *buffer = buffer_;
693 size_t bytes_read = 0;
695 while (bytes_read < size)
697 int retval = read (fd, buffer + bytes_read, size - bytes_read);
699 bytes_read += retval;
700 else if (retval == 0)
702 else if (errno != EINTR)
709 /* Calls write(), passing FD, BUFFER, and SIZE, repeating as
710 necessary to deal with interrupted calls. */
712 full_write (int fd, const void *buffer_, size_t size)
714 const char *buffer = buffer_;
715 size_t bytes_written = 0;
717 while (bytes_written < size)
719 int retval = write (fd, buffer + bytes_written, size - bytes_written);
721 bytes_written += retval;
722 else if (errno != EINTR)
726 return bytes_written;
729 /* Registers our exit handler with atexit() if it has not already
732 register_atexit (void)
734 static int registered = 0;
738 atexit (exit_handler);
742 /* atexit() handler that closes and deletes our temporary
747 while (casefiles != NULL)
748 casefile_destroy (casefiles);
751 #include <gsl/gsl_rng.h>
756 static void test_casefile (int pattern, size_t value_cnt, size_t case_cnt);
757 static void get_random_case (struct ccase *, size_t value_cnt,
759 static void write_random_case (struct casefile *cf, size_t case_idx);
760 static void read_and_verify_random_case (struct casefile *cf,
761 struct casereader *reader,
763 static void fail_test (const char *message, ...);
766 cmd_debug_casefile (void)
768 static const size_t sizes[] =
770 1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 31, 55, 73,
771 100, 137, 257, 521, 1031, 2053
777 size_max = sizeof sizes / sizeof *sizes;
778 if (lex_match_id ("SMALL"))
786 return lex_end_of_command ();
788 for (pattern = 0; pattern < 6; pattern++)
792 for (size = sizes; size < sizes + size_max; size++)
796 for (case_cnt = 0; case_cnt <= case_max;
797 case_cnt = (case_cnt * 2) + 1)
798 test_casefile (pattern, *size, case_cnt);
801 printf ("Casefile tests succeeded.\n");
806 test_casefile (int pattern, size_t value_cnt, size_t case_cnt)
809 struct casereader *r1, *r2;
814 rng = gsl_rng_alloc (gsl_rng_mt19937);
815 cf = casefile_create (value_cnt);
817 casefile_to_disk (cf);
818 for (i = 0; i < case_cnt; i++)
819 write_random_case (cf, i);
822 r1 = casefile_get_reader (cf);
823 r2 = casefile_get_reader (cf);
828 for (i = 0; i < case_cnt; i++)
830 read_and_verify_random_case (cf, r1, i);
831 read_and_verify_random_case (cf, r2, i);
835 for (i = 0; i < case_cnt; i++)
836 read_and_verify_random_case (cf, r1, i);
837 for (i = 0; i < case_cnt; i++)
838 read_and_verify_random_case (cf, r2, i);
843 for (i = j = 0; i < case_cnt; i++)
845 read_and_verify_random_case (cf, r1, i);
846 if (gsl_rng_get (rng) % pattern == 0)
847 read_and_verify_random_case (cf, r2, j++);
848 if (i == case_cnt / 2)
849 casefile_to_disk (cf);
851 for (; j < case_cnt; j++)
852 read_and_verify_random_case (cf, r2, j);
855 if (casereader_read (r1, &c))
856 fail_test ("Casereader 1 not at end of file.");
857 if (casereader_read (r2, &c))
858 fail_test ("Casereader 2 not at end of file.");
860 casereader_destroy (r1);
862 casereader_destroy (r2);
865 r1 = casefile_get_destructive_reader (cf);
866 for (i = 0; i < case_cnt; i++)
868 struct ccase read_case, expected_case;
870 get_random_case (&expected_case, value_cnt, i);
871 if (!casereader_read_xfer (r1, &read_case))
872 fail_test ("Premature end of casefile.");
873 for (j = 0; j < value_cnt; j++)
875 double a = case_num (&read_case, j);
876 double b = case_num (&expected_case, j);
878 fail_test ("Case %lu fails comparison.", (unsigned long) i);
880 case_destroy (&expected_case);
881 case_destroy (&read_case);
883 casereader_destroy (r1);
885 casefile_destroy (cf);
890 get_random_case (struct ccase *c, size_t value_cnt, size_t case_idx)
893 case_create (c, value_cnt);
894 for (i = 0; i < value_cnt; i++)
895 case_data_rw (c, i)->f = case_idx % 257 + i;
899 write_random_case (struct casefile *cf, size_t case_idx)
902 get_random_case (&c, casefile_get_value_cnt (cf), case_idx);
903 casefile_append_xfer (cf, &c);
907 read_and_verify_random_case (struct casefile *cf,
908 struct casereader *reader, size_t case_idx)
910 struct ccase read_case, expected_case;
914 value_cnt = casefile_get_value_cnt (cf);
915 get_random_case (&expected_case, value_cnt, case_idx);
916 if (!casereader_read (reader, &read_case))
917 fail_test ("Premature end of casefile.");
918 for (i = 0; i < value_cnt; i++)
920 double a = case_num (&read_case, i);
921 double b = case_num (&expected_case, i);
923 fail_test ("Case %lu fails comparison.", (unsigned long) case_idx);
925 case_destroy (&read_case);
926 case_destroy (&expected_case);
930 fail_test (const char *message, ...)
934 va_start (args, message);
935 vprintf (message, args);