1 /* PSPP - computes sample statistics.
2 Copyright (C) 2004 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
36 #ifdef HAVE_VALGRIND_VALGRIND_H
37 #include <valgrind/valgrind.h>
40 #define IO_BUF_SIZE (8192 / sizeof (union value))
42 /* A casefile is a sequentially accessible array of immutable
43 cases. It may be stored in memory or on disk as workspace
44 allows. Cases may be appended to the end of the file. Cases
45 may be read sequentially starting from the beginning of the
46 file. Once any cases have been read, no more cases may be
47 appended. The entire file is discarded at once. */
49 /* In-memory cases are arranged in an array of arrays. The top
50 level is variable size and the size of each bottom level array
51 is fixed at the number of cases defined here. */
52 #define CASES_PER_BLOCK 128
58 struct casefile *next, *prev; /* Next, prev in global list. */
59 size_t value_cnt; /* Case size in `union value's. */
60 size_t case_acct_size; /* Case size for accounting. */
61 unsigned long case_cnt; /* Number of cases stored. */
62 enum { MEMORY, DISK } storage; /* Where cases are stored. */
63 enum { WRITE, READ } mode; /* Is writing or reading allowed? */
64 struct casereader *readers; /* List of our readers. */
65 int being_destroyed; /* Does a destructive reader exist? */
68 struct ccase **cases; /* Pointer to array of cases. */
71 int fd; /* File descriptor, -1 if none. */
72 char *filename; /* Filename. */
73 union value *buffer; /* I/O buffer, NULL if none. */
74 size_t buffer_used; /* Number of values used in buffer. */
75 size_t buffer_size; /* Buffer size in values. */
78 /* For reading out the cases in a casefile. */
81 struct casereader *next, *prev; /* Next, prev in casefile's list. */
82 struct casefile *cf; /* Our casefile. */
83 unsigned long case_idx; /* Case number of current case. */
84 int destructive; /* Is this a destructive reader? */
87 int fd; /* File descriptor. */
88 union value *buffer; /* I/O buffer. */
89 size_t buffer_pos; /* Offset of buffer position. */
90 struct ccase c; /* Current case. */
93 /* Return the case number of the current case */
95 casereader_cnum(const struct casereader *r)
100 /* Doubly linked list of all casefiles. */
101 static struct casefile *casefiles;
103 /* Number of bytes of case allocated in in-memory casefiles. */
104 static size_t case_bytes;
106 static void register_atexit (void);
107 static void exit_handler (void);
109 static void reader_open_file (struct casereader *reader);
110 static void write_case_to_disk (struct casefile *cf, const struct ccase *c);
111 static void flush_buffer (struct casefile *cf);
112 static void fill_buffer (struct casereader *reader);
114 static int safe_open (const char *filename, int flags);
115 static int safe_close (int fd);
116 static int full_read (int fd, void *buffer, size_t size);
117 static int full_write (int fd, const void *buffer, size_t size);
119 /* Creates and returns a casefile to store cases of VALUE_CNT
120 `union value's each. */
122 casefile_create (size_t value_cnt)
124 struct casefile *cf = xmalloc (sizeof *cf);
125 cf->next = casefiles;
127 if (cf->next != NULL)
130 cf->value_cnt = value_cnt;
131 cf->case_acct_size = (cf->value_cnt + 4) * sizeof *cf->buffer;
133 cf->storage = MEMORY;
136 cf->being_destroyed = 0;
141 cf->buffer_size = ROUND_UP (cf->value_cnt, IO_BUF_SIZE);
142 if (cf->value_cnt > 0 && cf->buffer_size % cf->value_cnt > 64)
143 cf->buffer_size = cf->value_cnt;
149 /* Destroys casefile CF. */
151 casefile_destroy (struct casefile *cf)
155 if (cf->next != NULL)
156 cf->next->prev = cf->prev;
157 if (cf->prev != NULL)
158 cf->prev->next = cf->next;
160 casefiles = cf->next;
162 while (cf->readers != NULL)
163 casereader_destroy (cf->readers);
165 if (cf->cases != NULL)
167 size_t idx, block_cnt;
169 case_bytes -= cf->case_cnt * cf->case_acct_size;
170 for (idx = 0; idx < cf->case_cnt; idx++)
172 size_t block_idx = idx / CASES_PER_BLOCK;
173 size_t case_idx = idx % CASES_PER_BLOCK;
174 struct ccase *c = &cf->cases[block_idx][case_idx];
178 block_cnt = DIV_RND_UP (cf->case_cnt, CASES_PER_BLOCK);
179 for (idx = 0; idx < block_cnt; idx++)
180 free (cf->cases[idx]);
188 if (cf->filename != NULL && remove (cf->filename) == -1)
189 msg (ME, _("%s: Removing temporary file: %s."),
190 cf->filename, strerror (errno));
199 /* Returns nonzero only if casefile CF is stored in memory (instead of on
202 casefile_in_core (const struct casefile *cf)
206 return cf->storage == MEMORY;
209 /* Puts a casefile to "sleep", that is, minimizes the resources
210 needed for it by closing its file descriptor and freeing its
211 buffer. This is useful if we need so many casefiles that we
212 might not have enough memory and file descriptors to go
215 For simplicity, this implementation always converts the
216 casefile to reader mode. If this turns out to be a problem,
217 with a little extra work we could also support sleeping
220 casefile_sleep (const struct casefile *cf_)
222 struct casefile *cf = (struct casefile *) cf_;
225 casefile_mode_reader (cf);
226 casefile_to_disk (cf);
233 if (cf->buffer != NULL)
240 /* Returns the number of `union value's in a case for CF. */
242 casefile_get_value_cnt (const struct casefile *cf)
246 return cf->value_cnt;
249 /* Returns the number of cases in casefile CF. */
251 casefile_get_case_cnt (const struct casefile *cf)
258 /* Appends a copy of case C to casefile CF. Not valid after any
259 reader for CF has been created. */
261 casefile_append (struct casefile *cf, const struct ccase *c)
265 assert (cf->mode == WRITE);
267 /* Try memory first. */
268 if (cf->storage == MEMORY)
270 if (case_bytes < get_max_workspace ())
272 size_t block_idx = cf->case_cnt / CASES_PER_BLOCK;
273 size_t case_idx = cf->case_cnt % CASES_PER_BLOCK;
274 struct ccase new_case;
276 case_bytes += cf->case_acct_size;
277 case_clone (&new_case, c);
280 if ((block_idx & (block_idx - 1)) == 0)
282 size_t block_cap = block_idx == 0 ? 1 : block_idx * 2;
283 cf->cases = xrealloc (cf->cases,
284 sizeof *cf->cases * block_cap);
287 cf->cases[block_idx] = xmalloc (sizeof **cf->cases
291 case_move (&cf->cases[block_idx][case_idx], &new_case);
295 casefile_to_disk (cf);
296 assert (cf->storage == DISK);
297 write_case_to_disk (cf, c);
301 write_case_to_disk (cf, c);
306 /* Appends case C to casefile CF, which takes over ownership of
307 C. Not valid after any reader for CF has been created. */
309 casefile_append_xfer (struct casefile *cf, struct ccase *c)
311 casefile_append (cf, c);
315 /* Writes case C to casefile CF's disk buffer, first flushing the buffer to
316 disk if it would otherwise overflow. */
318 write_case_to_disk (struct casefile *cf, const struct ccase *c)
320 case_to_values (c, cf->buffer + cf->buffer_used, cf->value_cnt);
321 cf->buffer_used += cf->value_cnt;
322 if (cf->buffer_used + cf->value_cnt > cf->buffer_size)
326 /* If any bytes in CF's output buffer are used, flush them to
329 flush_buffer (struct casefile *cf)
331 if (cf->buffer_used > 0)
333 if (!full_write (cf->fd, cf->buffer,
334 cf->buffer_size * sizeof *cf->buffer))
335 msg (FE, _("Error writing temporary file: %s."), strerror (errno));
341 /* Creates a temporary file and stores its name in *FILENAME and
342 a file descriptor for it in *FD. Returns success. Caller is
343 responsible for freeing *FILENAME. */
345 make_temp_file (int *fd, char **filename)
347 const char *parent_dir;
349 assert (filename != NULL);
352 if (getenv ("TMPDIR") != NULL)
353 parent_dir = getenv ("TMPDIR");
355 parent_dir = P_tmpdir;
357 *filename = xmalloc (strlen (parent_dir) + 32);
358 sprintf (*filename, "%s%cpsppXXXXXX", parent_dir, DIR_SEPARATOR);
359 *fd = mkstemp (*filename);
362 msg (FE, _("%s: Creating temporary file: %s."),
363 *filename, strerror (errno));
371 /* If CF is currently stored in memory, writes it to disk. Readers, if any,
372 retain their current positions. */
374 casefile_to_disk (const struct casefile *cf_)
376 struct casefile *cf = (struct casefile *) cf_;
377 struct casereader *reader;
381 if (cf->storage == MEMORY)
383 size_t idx, block_cnt;
385 assert (cf->filename == NULL);
386 assert (cf->fd == -1);
387 assert (cf->buffer_used == 0);
390 if (!make_temp_file (&cf->fd, &cf->filename))
392 cf->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer);
393 memset (cf->buffer, 0, cf->buffer_size * sizeof *cf->buffer);
395 case_bytes -= cf->case_cnt * cf->case_acct_size;
396 for (idx = 0; idx < cf->case_cnt; idx++)
398 size_t block_idx = idx / CASES_PER_BLOCK;
399 size_t case_idx = idx % CASES_PER_BLOCK;
400 struct ccase *c = &cf->cases[block_idx][case_idx];
401 write_case_to_disk (cf, c);
405 block_cnt = DIV_RND_UP (cf->case_cnt, CASES_PER_BLOCK);
406 for (idx = 0; idx < block_cnt; idx++)
407 free (cf->cases[idx]);
412 if (cf->mode == READ)
415 for (reader = cf->readers; reader != NULL; reader = reader->next)
416 reader_open_file (reader);
420 /* Changes CF to reader mode, ensuring that no more cases may be
421 added. Creating a casereader for CF has the same effect. */
423 casefile_mode_reader (struct casefile *cf)
429 /* Creates and returns a casereader for CF. A casereader can be used to
430 sequentially read the cases in a casefile. */
432 casefile_get_reader (const struct casefile *cf_)
434 struct casefile *cf = (struct casefile *) cf_;
435 struct casereader *reader;
438 assert (!cf->being_destroyed);
440 /* Flush the buffer to disk if it's not empty. */
441 if (cf->mode == WRITE && cf->storage == DISK)
446 reader = xmalloc (sizeof *reader);
448 reader->next = cf->readers;
449 if (cf->readers != NULL)
450 reader->next->prev = reader;
452 cf->readers = reader;
453 reader->case_idx = 0;
455 reader->buffer = NULL;
456 reader->buffer_pos = 0;
457 case_nullify (&reader->c);
459 if (reader->cf->storage == DISK)
460 reader_open_file (reader);
465 /* Creates and returns a destructive casereader for CF. Like a
466 normal casereader, a destructive casereader sequentially reads
467 the cases in a casefile. Unlike a normal casereader, a
468 destructive reader cannot operate concurrently with any other
469 reader. (This restriction could be relaxed in a few ways, but
470 it is so far unnecessary for other code.) */
472 casefile_get_destructive_reader (struct casefile *cf)
474 struct casereader *reader;
476 assert (cf->readers == NULL);
477 reader = casefile_get_reader (cf);
478 reader->destructive = 1;
479 cf->being_destroyed = 1;
483 /* Opens a disk file for READER and seeks to the current position as indicated
484 by case_idx. Normally the current position is the beginning of the file,
485 but casefile_to_disk may cause the file to be opened at a different
488 reader_open_file (struct casereader *reader)
490 struct casefile *cf = reader->cf;
493 if (reader->case_idx >= cf->case_cnt)
503 reader->fd = safe_open (cf->filename, O_RDONLY);
505 msg (FE, _("%s: Opening temporary file: %s."),
506 cf->filename, strerror (errno));
509 if (cf->buffer != NULL)
511 reader->buffer = cf->buffer;
516 reader->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer);
517 memset (reader->buffer, 0, cf->buffer_size * sizeof *cf->buffer);
520 if (cf->value_cnt != 0)
522 size_t buffer_case_cnt = cf->buffer_size / cf->value_cnt;
523 file_ofs = ((off_t) reader->case_idx / buffer_case_cnt
524 * cf->buffer_size * sizeof *cf->buffer);
525 reader->buffer_pos = (reader->case_idx % buffer_case_cnt
530 if (lseek (reader->fd, file_ofs, SEEK_SET) != file_ofs)
531 msg (FE, _("%s: Seeking temporary file: %s."),
532 cf->filename, strerror (errno));
534 if (cf->case_cnt > 0 && cf->value_cnt > 0)
535 fill_buffer (reader);
537 case_create (&reader->c, cf->value_cnt);
540 /* Fills READER's buffer by reading a block from disk. */
542 fill_buffer (struct casereader *reader)
544 int retval = full_read (reader->fd, reader->buffer,
545 reader->cf->buffer_size * sizeof *reader->buffer);
547 msg (FE, _("%s: Reading temporary file: %s."),
548 reader->cf->filename, strerror (errno));
549 else if (retval != reader->cf->buffer_size * sizeof *reader->buffer)
550 msg (FE, _("%s: Temporary file ended unexpectedly."),
551 reader->cf->filename);
554 /* Returns the casefile that READER reads. */
555 const struct casefile *
556 casereader_get_casefile (const struct casereader *reader)
558 assert (reader != NULL);
563 /* Reads a copy of the next case from READER into C.
564 Caller is responsible for destroying C. */
566 casereader_read (struct casereader *reader, struct ccase *c)
568 assert (reader != NULL);
570 if (reader->case_idx >= reader->cf->case_cnt)
573 if (reader->cf->storage == MEMORY)
575 size_t block_idx = reader->case_idx / CASES_PER_BLOCK;
576 size_t case_idx = reader->case_idx % CASES_PER_BLOCK;
578 case_clone (c, &reader->cf->cases[block_idx][case_idx]);
584 if (reader->buffer_pos + reader->cf->value_cnt > reader->cf->buffer_size)
586 fill_buffer (reader);
587 reader->buffer_pos = 0;
590 case_from_values (&reader->c, reader->buffer + reader->buffer_pos,
591 reader->cf->value_cnt);
592 reader->buffer_pos += reader->cf->value_cnt;
595 case_clone (c, &reader->c);
600 /* Reads the next case from READER into C and transfers ownership
601 to the caller. Caller is responsible for destroying C. */
603 casereader_read_xfer (struct casereader *reader, struct ccase *c)
605 assert (reader != NULL);
607 if (reader->destructive == 0
608 || reader->case_idx >= reader->cf->case_cnt
609 || reader->cf->storage == DISK)
610 return casereader_read (reader, c);
613 size_t block_idx = reader->case_idx / CASES_PER_BLOCK;
614 size_t case_idx = reader->case_idx % CASES_PER_BLOCK;
615 struct ccase *read_case = &reader->cf->cases[block_idx][case_idx];
617 case_move (c, read_case);
623 /* Destroys READER. */
625 casereader_destroy (struct casereader *reader)
627 assert (reader != NULL);
629 if (reader->next != NULL)
630 reader->next->prev = reader->prev;
631 if (reader->prev != NULL)
632 reader->prev->next = reader->next;
633 if (reader->cf->readers == reader)
634 reader->cf->readers = reader->next;
636 if (reader->cf->buffer == NULL)
637 reader->cf->buffer = reader->buffer;
639 free (reader->buffer);
641 if (reader->fd != -1)
643 if (reader->cf->fd == -1)
644 reader->cf->fd = reader->fd;
646 safe_close (reader->fd);
649 case_destroy (&reader->c);
654 /* Calls open(), passing FILENAME and FLAGS, repeating as necessary
655 to deal with interrupted calls. */
657 safe_open (const char *filename, int flags)
663 fd = open (filename, flags);
665 while (fd == -1 && errno == EINTR);
670 /* Calls close(), passing FD, repeating as necessary to deal with
671 interrupted calls. */
672 static int safe_close (int fd)
680 while (retval == -1 && errno == EINTR);
685 /* Calls read(), passing FD, BUFFER, and SIZE, repeating as
686 necessary to deal with interrupted calls. */
688 full_read (int fd, void *buffer_, size_t size)
690 char *buffer = buffer_;
691 size_t bytes_read = 0;
693 while (bytes_read < size)
695 int retval = read (fd, buffer + bytes_read, size - bytes_read);
697 bytes_read += retval;
698 else if (retval == 0)
700 else if (errno != EINTR)
707 /* Calls write(), passing FD, BUFFER, and SIZE, repeating as
708 necessary to deal with interrupted calls. */
710 full_write (int fd, const void *buffer_, size_t size)
712 const char *buffer = buffer_;
713 size_t bytes_written = 0;
715 while (bytes_written < size)
717 int retval = write (fd, buffer + bytes_written, size - bytes_written);
719 bytes_written += retval;
720 else if (errno != EINTR)
724 return bytes_written;
727 /* Registers our exit handler with atexit() if it has not already
730 register_atexit (void)
732 static int registered = 0;
736 atexit (exit_handler);
740 /* atexit() handler that closes and deletes our temporary
745 while (casefiles != NULL)
746 casefile_destroy (casefiles);
749 #include <gsl/gsl_rng.h>
754 static void test_casefile (int pattern, size_t value_cnt, size_t case_cnt);
755 static void get_random_case (struct ccase *, size_t value_cnt,
757 static void write_random_case (struct casefile *cf, size_t case_idx);
758 static void read_and_verify_random_case (struct casefile *cf,
759 struct casereader *reader,
761 static void fail_test (const char *message, ...);
764 cmd_debug_casefile (void)
766 static const size_t sizes[] =
768 1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 31, 55, 73,
769 100, 137, 257, 521, 1031, 2053
775 size_max = sizeof sizes / sizeof *sizes;
776 if (lex_match_id ("SMALL"))
784 return lex_end_of_command ();
786 for (pattern = 0; pattern < 5; pattern++)
790 for (size = sizes; size < sizes + size_max; size++)
794 for (case_cnt = 0; case_cnt <= case_max;
795 case_cnt = (case_cnt * 2) + 1)
796 test_casefile (pattern, *size, case_cnt);
799 printf ("Casefile tests succeeded.\n");
804 test_casefile (int pattern, size_t value_cnt, size_t case_cnt)
807 struct casereader *r1, *r2;
812 rng = gsl_rng_alloc (gsl_rng_mt19937);
813 cf = casefile_create (value_cnt);
814 for (i = 0; i < case_cnt; i++)
815 write_random_case (cf, i);
816 r1 = casefile_get_reader (cf);
817 r2 = casefile_get_reader (cf);
821 for (i = 0; i < case_cnt; i++)
823 read_and_verify_random_case (cf, r1, i);
824 read_and_verify_random_case (cf, r2, i);
828 for (i = 0; i < case_cnt; i++)
829 read_and_verify_random_case (cf, r1, i);
830 for (i = 0; i < case_cnt; i++)
831 read_and_verify_random_case (cf, r2, i);
836 for (i = j = 0; i < case_cnt; i++)
838 read_and_verify_random_case (cf, r1, i);
839 if (gsl_rng_get (rng) % pattern == 0)
840 read_and_verify_random_case (cf, r2, j++);
841 if (i == case_cnt / 2)
842 casefile_to_disk (cf);
844 for (; j < case_cnt; j++)
845 read_and_verify_random_case (cf, r2, j);
848 if (casereader_read (r1, &c))
849 fail_test ("Casereader 1 not at end of file.");
850 if (casereader_read (r2, &c))
851 fail_test ("Casereader 2 not at end of file.");
853 casereader_destroy (r1);
855 casereader_destroy (r2);
858 r1 = casefile_get_destructive_reader (cf);
859 for (i = 0; i < case_cnt; i++)
861 struct ccase read_case, expected_case;
863 get_random_case (&expected_case, value_cnt, i);
864 if (!casereader_read_xfer (r1, &read_case))
865 fail_test ("Premature end of casefile.");
866 for (j = 0; j < value_cnt; j++)
868 double a = case_num (&read_case, j);
869 double b = case_num (&expected_case, j);
871 fail_test ("Case %lu fails comparison.", (unsigned long) i);
873 case_destroy (&expected_case);
874 case_destroy (&read_case);
876 casereader_destroy (r1);
878 casefile_destroy (cf);
883 get_random_case (struct ccase *c, size_t value_cnt, size_t case_idx)
886 case_create (c, value_cnt);
887 for (i = 0; i < value_cnt; i++)
888 case_data_rw (c, i)->f = case_idx % 257 + i;
892 write_random_case (struct casefile *cf, size_t case_idx)
895 get_random_case (&c, casefile_get_value_cnt (cf), case_idx);
896 casefile_append_xfer (cf, &c);
900 read_and_verify_random_case (struct casefile *cf,
901 struct casereader *reader, size_t case_idx)
903 struct ccase read_case, expected_case;
907 value_cnt = casefile_get_value_cnt (cf);
908 get_random_case (&expected_case, value_cnt, case_idx);
909 if (!casereader_read (reader, &read_case))
910 fail_test ("Premature end of casefile.");
911 for (i = 0; i < value_cnt; i++)
913 double a = case_num (&read_case, i);
914 double b = case_num (&expected_case, i);
916 fail_test ("Case %lu fails comparison.", (unsigned long) case_idx);
918 case_destroy (&read_case);
919 case_destroy (&expected_case);
923 fail_test (const char *message, ...)
927 va_start (args, message);
928 vprintf (message, args);