1 /* PSPP - computes sample statistics.
2 Copyright (C) 2004 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
36 #ifdef HAVE_VALGRIND_VALGRIND_H
37 #include <valgrind/valgrind.h>
40 #define IO_BUF_SIZE 8192
42 /* A casefile is a sequentially accessible array of immutable
43 cases. It may be stored in memory or on disk as workspace
44 allows. Cases may be appended to the end of the file. Cases
45 may be read sequentially starting from the beginning of the
46 file. Once any cases have been read, no more cases may be
47 appended. The entire file is discarded at once. */
49 /* In-memory cases are arranged in an array of arrays. The top
50 level is variable size and the size of each bottom level array
51 is fixed at the number of cases defined here. */
52 #define CASES_PER_BLOCK 128
58 struct casefile *next, *prev; /* Next, prev in global list. */
59 size_t value_cnt; /* Case size in `union value's. */
60 size_t case_size; /* Case size in bytes. */
61 size_t case_acct_size; /* Case size for accounting. */
62 unsigned long case_cnt; /* Number of cases stored. */
63 enum { MEMORY, DISK } storage; /* Where cases are stored. */
64 enum { WRITE, READ } mode; /* Is writing or reading allowed? */
65 struct casereader *readers; /* List of our readers. */
66 int being_destroyed; /* Does a destructive reader exist? */
69 struct ccase **cases; /* Pointer to array of cases. */
72 int fd; /* File descriptor, -1 if none. */
73 char *filename; /* Filename. */
74 unsigned char *buffer; /* I/O buffer, NULL if none. */
75 size_t buffer_used; /* Number of bytes used in buffer. */
76 size_t buffer_size; /* Buffer size in bytes. */
79 /* For reading out the cases in a casefile. */
82 struct casereader *next, *prev; /* Next, prev in casefile's list. */
83 struct casefile *cf; /* Our casefile. */
84 unsigned long case_idx; /* Case number of current case. */
85 int destructive; /* Is this a destructive reader? */
88 int fd; /* File descriptor. */
89 unsigned char *buffer; /* I/O buffer. */
90 size_t buffer_pos; /* Byte offset of buffer position. */
91 struct ccase c; /* Current case. */
94 /* Doubly linked list of all casefiles. */
95 static struct casefile *casefiles;
97 /* Number of bytes of case allocated in in-memory casefiles. */
98 static size_t case_bytes;
100 static void register_atexit (void);
101 static void exit_handler (void);
103 static void reader_open_file (struct casereader *reader);
104 static void write_case_to_disk (struct casefile *cf, const struct ccase *c);
105 static void flush_buffer (struct casefile *cf);
106 static void fill_buffer (struct casereader *reader);
108 static int safe_open (const char *filename, int flags);
109 static int safe_close (int fd);
110 static int full_read (int fd, char *buffer, size_t size);
111 static int full_write (int fd, const char *buffer, size_t size);
113 /* Creates and returns a casefile to store cases of VALUE_CNT
114 `union value's each. */
116 casefile_create (size_t value_cnt)
118 struct casefile *cf = xmalloc (sizeof *cf);
119 cf->next = casefiles;
121 if (cf->next != NULL)
124 cf->value_cnt = value_cnt;
125 cf->case_size = case_serial_size (value_cnt);
126 cf->case_acct_size = cf->case_size + 4 * sizeof (void *);
128 cf->storage = MEMORY;
131 cf->being_destroyed = 0;
136 cf->buffer_size = ROUND_UP (cf->case_size, IO_BUF_SIZE);
137 if (cf->case_size > 0 && cf->buffer_size % cf->case_size > 512)
138 cf->buffer_size = cf->case_size;
144 /* Destroys casefile CF. */
146 casefile_destroy (struct casefile *cf)
150 if (cf->next != NULL)
151 cf->next->prev = cf->prev;
152 if (cf->prev != NULL)
153 cf->prev->next = cf->next;
155 casefiles = cf->next;
157 while (cf->readers != NULL)
158 casereader_destroy (cf->readers);
160 if (cf->cases != NULL)
162 size_t idx, block_cnt;
164 case_bytes -= cf->case_cnt * cf->case_acct_size;
165 for (idx = 0; idx < cf->case_cnt; idx++)
167 size_t block_idx = idx / CASES_PER_BLOCK;
168 size_t case_idx = idx % CASES_PER_BLOCK;
169 struct ccase *c = &cf->cases[block_idx][case_idx];
173 block_cnt = DIV_RND_UP (cf->case_cnt, CASES_PER_BLOCK);
174 for (idx = 0; idx < block_cnt; idx++)
175 free (cf->cases[idx]);
183 if (cf->filename != NULL && remove (cf->filename) == -1)
184 msg (ME, _("%s: Removing temporary file: %s."),
185 cf->filename, strerror (errno));
194 /* Returns nonzero only if casefile CF is stored in memory (instead of on
197 casefile_in_core (const struct casefile *cf)
201 return cf->storage == MEMORY;
204 /* Puts a casefile to "sleep", that is, minimizes the resources
205 needed for it by closing its file descriptor and freeing its
206 buffer. This is useful if we need so many casefiles that we
207 might not have enough memory and file descriptors to go
210 For simplicity, this implementation always converts the
211 casefile to reader mode. If this turns out to be a problem,
212 with a little extra work we could also support sleeping
215 casefile_sleep (const struct casefile *cf_)
217 struct casefile *cf = (struct casefile *) cf_;
220 casefile_mode_reader (cf);
221 casefile_to_disk (cf);
228 if (cf->buffer != NULL)
235 /* Returns the number of `union value's in a case for CF. */
237 casefile_get_value_cnt (const struct casefile *cf)
241 return cf->value_cnt;
244 /* Returns the number of cases in casefile CF. */
246 casefile_get_case_cnt (const struct casefile *cf)
253 /* Appends a copy of case C to casefile CF. Not valid after any
254 reader for CF has been created. */
256 casefile_append (struct casefile *cf, const struct ccase *c)
260 assert (cf->mode == WRITE);
262 /* Try memory first. */
263 if (cf->storage == MEMORY)
265 if (case_bytes < get_max_workspace ())
267 size_t block_idx = cf->case_cnt / CASES_PER_BLOCK;
268 size_t case_idx = cf->case_cnt % CASES_PER_BLOCK;
269 struct ccase new_case;
271 case_bytes += cf->case_acct_size;
272 case_clone (&new_case, c);
275 if ((block_idx & (block_idx - 1)) == 0)
277 size_t block_cap = block_idx == 0 ? 1 : block_idx * 2;
278 cf->cases = xrealloc (cf->cases,
279 sizeof *cf->cases * block_cap);
282 cf->cases[block_idx] = xmalloc (sizeof **cf->cases
286 case_move (&cf->cases[block_idx][case_idx], &new_case);
290 casefile_to_disk (cf);
291 assert (cf->storage == DISK);
292 write_case_to_disk (cf, c);
296 write_case_to_disk (cf, c);
301 /* Appends case C to casefile CF, which takes over ownership of
302 C. Not valid after any reader for CF has been created. */
304 casefile_append_xfer (struct casefile *cf, struct ccase *c)
306 casefile_append (cf, c);
310 /* Writes case C to casefile CF's disk buffer, first flushing the buffer to
311 disk if it would otherwise overflow. */
313 write_case_to_disk (struct casefile *cf, const struct ccase *c)
315 case_serialize (c, cf->buffer + cf->buffer_used, cf->case_size);
316 cf->buffer_used += cf->case_size;
317 if (cf->buffer_used + cf->case_size > cf->buffer_size)
321 /* If any bytes in CF's output buffer are used, flush them to
324 flush_buffer (struct casefile *cf)
326 if (cf->buffer_used > 0)
328 if (!full_write (cf->fd, cf->buffer, cf->buffer_size))
329 msg (FE, _("Error writing temporary file: %s."), strerror (errno));
335 /* Creates a temporary file and stores its name in *FILENAME and
336 a file descriptor for it in *FD. Returns success. Caller is
337 responsible for freeing *FILENAME. */
339 make_temp_file (int *fd, char **filename)
341 const char *parent_dir;
343 assert (filename != NULL);
346 if (getenv ("TMPDIR") != NULL)
347 parent_dir = getenv ("TMPDIR");
349 parent_dir = P_tmpdir;
351 *filename = xmalloc (strlen (parent_dir) + 32);
352 sprintf (*filename, "%s%cpsppXXXXXX", parent_dir, DIR_SEPARATOR);
353 *fd = mkstemp (*filename);
356 msg (FE, _("%s: Creating temporary file: %s."),
357 *filename, strerror (errno));
365 /* If CF is currently stored in memory, writes it to disk. Readers, if any,
366 retain their current positions. */
368 casefile_to_disk (const struct casefile *cf_)
370 struct casefile *cf = (struct casefile *) cf_;
371 struct casereader *reader;
375 if (cf->storage == MEMORY)
377 size_t idx, block_cnt;
379 assert (cf->filename == NULL);
380 assert (cf->fd == -1);
381 assert (cf->buffer_used == 0);
384 if (!make_temp_file (&cf->fd, &cf->filename))
386 cf->buffer = xmalloc (cf->buffer_size);
387 memset (cf->buffer, 0, cf->buffer_size);
389 case_bytes -= cf->case_cnt * cf->case_acct_size;
390 for (idx = 0; idx < cf->case_cnt; idx++)
392 size_t block_idx = idx / CASES_PER_BLOCK;
393 size_t case_idx = idx % CASES_PER_BLOCK;
394 struct ccase *c = &cf->cases[block_idx][case_idx];
395 write_case_to_disk (cf, c);
399 block_cnt = DIV_RND_UP (cf->case_cnt, CASES_PER_BLOCK);
400 for (idx = 0; idx < block_cnt; idx++)
401 free (cf->cases[idx]);
406 if (cf->mode == READ)
409 for (reader = cf->readers; reader != NULL; reader = reader->next)
410 reader_open_file (reader);
414 /* Changes CF to reader mode, ensuring that no more cases may be
415 added. Creating a casereader for CF has the same effect. */
417 casefile_mode_reader (struct casefile *cf)
423 /* Creates and returns a casereader for CF. A casereader can be used to
424 sequentially read the cases in a casefile. */
426 casefile_get_reader (const struct casefile *cf_)
428 struct casefile *cf = (struct casefile *) cf_;
429 struct casereader *reader;
432 assert (!cf->being_destroyed);
434 /* Flush the buffer to disk if it's not empty. */
435 if (cf->mode == WRITE && cf->storage == DISK)
440 reader = xmalloc (sizeof *reader);
442 reader->next = cf->readers;
443 if (cf->readers != NULL)
444 reader->next->prev = reader;
446 cf->readers = reader;
447 reader->case_idx = 0;
449 reader->buffer = NULL;
450 reader->buffer_pos = 0;
451 case_nullify (&reader->c);
453 if (reader->cf->storage == DISK)
454 reader_open_file (reader);
459 /* Creates and returns a destructive casereader for CF. Like a
460 normal casereader, a destructive casereader sequentially reads
461 the cases in a casefile. Unlike a normal casereader, a
462 destructive reader cannot operate concurrently with any other
463 reader. (This restriction could be relaxed in a few ways, but
464 it is so far unnecessary for other code.) */
466 casefile_get_destructive_reader (struct casefile *cf)
468 struct casereader *reader;
470 assert (cf->readers == NULL);
471 reader = casefile_get_reader (cf);
472 reader->destructive = 1;
473 cf->being_destroyed = 1;
477 /* Opens a disk file for READER and seeks to the current position as indicated
478 by case_idx. Normally the current position is the beginning of the file,
479 but casefile_to_disk may cause the file to be opened at a different
482 reader_open_file (struct casereader *reader)
484 struct casefile *cf = reader->cf;
485 size_t buffer_case_cnt;
488 if (reader->case_idx >= cf->case_cnt)
498 reader->fd = safe_open (cf->filename, O_RDONLY);
500 msg (FE, _("%s: Opening temporary file: %s."),
501 cf->filename, strerror (errno));
504 if (cf->buffer != NULL)
506 reader->buffer = cf->buffer;
511 reader->buffer = xmalloc (cf->buffer_size);
512 memset (reader->buffer, 0, cf->buffer_size);
515 if (cf->case_size != 0)
517 buffer_case_cnt = cf->buffer_size / cf->case_size;
518 file_ofs = ((off_t) reader->case_idx
519 / buffer_case_cnt * cf->buffer_size);
520 reader->buffer_pos = (reader->case_idx % buffer_case_cnt
525 if (lseek (reader->fd, file_ofs, SEEK_SET) != file_ofs)
526 msg (FE, _("%s: Seeking temporary file: %s."),
527 cf->filename, strerror (errno));
529 if (cf->case_cnt > 0 && cf->case_size > 0)
530 fill_buffer (reader);
532 case_create (&reader->c, cf->value_cnt);
535 /* Fills READER's buffer by reading a block from disk. */
537 fill_buffer (struct casereader *reader)
539 int retval = full_read (reader->fd, reader->buffer, reader->cf->buffer_size);
541 msg (FE, _("%s: Reading temporary file: %s."),
542 reader->cf->filename, strerror (errno));
543 else if (retval != reader->cf->buffer_size)
544 msg (FE, _("%s: Temporary file ended unexpectedly."),
545 reader->cf->filename);
548 /* Returns the casefile that READER reads. */
549 const struct casefile *
550 casereader_get_casefile (const struct casereader *reader)
552 assert (reader != NULL);
557 /* Reads a copy of the next case from READER into C.
558 Caller is responsible for destroying C. */
560 casereader_read (struct casereader *reader, struct ccase *c)
562 assert (reader != NULL);
564 if (reader->case_idx >= reader->cf->case_cnt)
567 if (reader->cf->storage == MEMORY)
569 size_t block_idx = reader->case_idx / CASES_PER_BLOCK;
570 size_t case_idx = reader->case_idx % CASES_PER_BLOCK;
572 case_clone (c, &reader->cf->cases[block_idx][case_idx]);
578 if (reader->buffer_pos + reader->cf->case_size > reader->cf->buffer_size)
580 fill_buffer (reader);
581 reader->buffer_pos = 0;
584 case_unserialize (&reader->c, reader->buffer + reader->buffer_pos,
585 reader->cf->case_size);
586 reader->buffer_pos += reader->cf->case_size;
589 case_clone (c, &reader->c);
594 /* Reads the next case from READER into C and transfers ownership
595 to the caller. Caller is responsible for destroying C. */
597 casereader_read_xfer (struct casereader *reader, struct ccase *c)
599 assert (reader != NULL);
601 if (reader->destructive == 0
602 || reader->case_idx >= reader->cf->case_cnt
603 || reader->cf->storage == DISK)
604 return casereader_read (reader, c);
607 size_t block_idx = reader->case_idx / CASES_PER_BLOCK;
608 size_t case_idx = reader->case_idx % CASES_PER_BLOCK;
609 struct ccase *read_case = &reader->cf->cases[block_idx][case_idx];
611 case_move (c, read_case);
617 /* Destroys READER. */
619 casereader_destroy (struct casereader *reader)
621 assert (reader != NULL);
623 if (reader->next != NULL)
624 reader->next->prev = reader->prev;
625 if (reader->prev != NULL)
626 reader->prev->next = reader->next;
627 if (reader->cf->readers == reader)
628 reader->cf->readers = reader->next;
630 if (reader->cf->buffer == NULL)
631 reader->cf->buffer = reader->buffer;
633 free (reader->buffer);
635 if (reader->fd != -1)
637 if (reader->cf->fd == -1)
638 reader->cf->fd = reader->fd;
640 safe_close (reader->fd);
643 case_destroy (&reader->c);
648 /* Calls open(), passing FILENAME and FLAGS, repeating as necessary
649 to deal with interrupted calls. */
651 safe_open (const char *filename, int flags)
657 fd = open (filename, flags);
659 while (fd == -1 && errno == EINTR);
664 /* Calls close(), passing FD, repeating as necessary to deal with
665 interrupted calls. */
666 static int safe_close (int fd)
674 while (retval == -1 && errno == EINTR);
679 /* Calls read(), passing FD, BUFFER, and SIZE, repeating as
680 necessary to deal with interrupted calls. */
682 full_read (int fd, char *buffer, size_t size)
684 size_t bytes_read = 0;
686 while (bytes_read < size)
688 int retval = read (fd, buffer + bytes_read, size - bytes_read);
690 bytes_read += retval;
691 else if (retval == 0)
693 else if (errno != EINTR)
700 /* Calls write(), passing FD, BUFFER, and SIZE, repeating as
701 necessary to deal with interrupted calls. */
703 full_write (int fd, const char *buffer, size_t size)
705 size_t bytes_written = 0;
707 while (bytes_written < size)
709 int retval = write (fd, buffer + bytes_written, size - bytes_written);
711 bytes_written += retval;
712 else if (errno != EINTR)
716 return bytes_written;
719 /* Registers our exit handler with atexit() if it has not already
722 register_atexit (void)
724 static int registered = 0;
728 atexit (exit_handler);
732 /* atexit() handler that closes and deletes our temporary
737 while (casefiles != NULL)
738 casefile_destroy (casefiles);
746 static void test_casefile (int pattern, size_t value_cnt, size_t case_cnt);
747 static void get_random_case (struct ccase *, size_t value_cnt,
749 static void write_random_case (struct casefile *cf, size_t case_idx);
750 static void read_and_verify_random_case (struct casefile *cf,
751 struct casereader *reader,
753 static void fail_test (const char *message, ...);
756 cmd_debug_casefile (void)
758 static const size_t sizes[] =
760 1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 31, 55, 73,
761 100, 137, 257, 521, 1031, 2053
767 size_max = sizeof sizes / sizeof *sizes;
768 if (lex_match_id ("SMALL"))
776 return lex_end_of_command ();
778 for (pattern = 0; pattern < 5; pattern++)
782 for (size = sizes; size < sizes + size_max; size++)
786 for (case_cnt = 0; case_cnt <= case_max;
787 case_cnt = (case_cnt * 2) + 1)
788 test_casefile (pattern, *size, case_cnt);
791 printf ("Casefile tests succeeded.\n");
796 test_casefile (int pattern, size_t value_cnt, size_t case_cnt)
800 struct casereader *r1, *r2;
806 rng_seed (rng, &zero, sizeof zero);
807 cf = casefile_create (value_cnt);
808 for (i = 0; i < case_cnt; i++)
809 write_random_case (cf, i);
810 r1 = casefile_get_reader (cf);
811 r2 = casefile_get_reader (cf);
815 for (i = 0; i < case_cnt; i++)
817 read_and_verify_random_case (cf, r1, i);
818 read_and_verify_random_case (cf, r2, i);
822 for (i = 0; i < case_cnt; i++)
823 read_and_verify_random_case (cf, r1, i);
824 for (i = 0; i < case_cnt; i++)
825 read_and_verify_random_case (cf, r2, i);
830 for (i = j = 0; i < case_cnt; i++)
832 read_and_verify_random_case (cf, r1, i);
833 if (rng_get_int (rng) % pattern == 0)
834 read_and_verify_random_case (cf, r2, j++);
835 if (i == case_cnt / 2)
836 casefile_to_disk (cf);
838 for (; j < case_cnt; j++)
839 read_and_verify_random_case (cf, r2, j);
842 if (casereader_read (r1, &c))
843 fail_test ("Casereader 1 not at end of file.");
844 if (casereader_read (r2, &c))
845 fail_test ("Casereader 2 not at end of file.");
847 casereader_destroy (r1);
849 casereader_destroy (r2);
852 r1 = casefile_get_destructive_reader (cf);
853 for (i = 0; i < case_cnt; i++)
855 struct ccase read_case, expected_case;
857 get_random_case (&expected_case, value_cnt, i);
858 if (!casereader_read_xfer (r1, &read_case))
859 fail_test ("Premature end of casefile.");
860 for (j = 0; j < value_cnt; j++)
862 double a = case_num (&read_case, j);
863 double b = case_num (&expected_case, j);
865 fail_test ("Case %lu fails comparison.", (unsigned long) i);
867 case_destroy (&expected_case);
868 case_destroy (&read_case);
870 casereader_destroy (r1);
872 casefile_destroy (cf);
877 get_random_case (struct ccase *c, size_t value_cnt, size_t case_idx)
880 case_create (c, value_cnt);
881 for (i = 0; i < value_cnt; i++)
882 case_data_rw (c, i)->f = case_idx % 257 + i;
886 write_random_case (struct casefile *cf, size_t case_idx)
889 get_random_case (&c, casefile_get_value_cnt (cf), case_idx);
890 casefile_append_xfer (cf, &c);
894 read_and_verify_random_case (struct casefile *cf,
895 struct casereader *reader, size_t case_idx)
897 struct ccase read_case, expected_case;
901 value_cnt = casefile_get_value_cnt (cf);
902 get_random_case (&expected_case, value_cnt, case_idx);
903 if (!casereader_read (reader, &read_case))
904 fail_test ("Premature end of casefile.");
905 for (i = 0; i < value_cnt; i++)
907 double a = case_num (&read_case, i);
908 double b = case_num (&expected_case, i);
910 fail_test ("Case %lu fails comparison.", (unsigned long) case_idx);
912 case_destroy (&read_case);
913 case_destroy (&expected_case);
917 fail_test (const char *message, ...)
921 va_start (args, message);
922 vprintf (message, args);