1 /* PSPP - computes sample statistics.
2 Copyright (C) 2004 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
36 #ifdef HAVE_VALGRIND_VALGRIND_H
37 #include <valgrind/valgrind.h>
40 #define IO_BUF_SIZE (8192 / sizeof (union value))
42 /* A casefile is a sequentially accessible array of immutable
43 cases. It may be stored in memory or on disk as workspace
44 allows. Cases may be appended to the end of the file. Cases
45 may be read sequentially starting from the beginning of the
46 file. Once any cases have been read, no more cases may be
47 appended. The entire file is discarded at once. */
49 /* In-memory cases are arranged in an array of arrays. The top
50 level is variable size and the size of each bottom level array
51 is fixed at the number of cases defined here. */
52 #define CASES_PER_BLOCK 128
58 struct casefile *next, *prev; /* Next, prev in global list. */
59 size_t value_cnt; /* Case size in `union value's. */
60 size_t case_acct_size; /* Case size for accounting. */
61 unsigned long case_cnt; /* Number of cases stored. */
62 enum { MEMORY, DISK } storage; /* Where cases are stored. */
63 enum { WRITE, READ } mode; /* Is writing or reading allowed? */
64 struct casereader *readers; /* List of our readers. */
65 int being_destroyed; /* Does a destructive reader exist? */
68 struct ccase **cases; /* Pointer to array of cases. */
71 int fd; /* File descriptor, -1 if none. */
72 char *filename; /* Filename. */
73 union value *buffer; /* I/O buffer, NULL if none. */
74 size_t buffer_used; /* Number of values used in buffer. */
75 size_t buffer_size; /* Buffer size in values. */
78 /* For reading out the cases in a casefile. */
81 struct casereader *next, *prev; /* Next, prev in casefile's list. */
82 struct casefile *cf; /* Our casefile. */
83 unsigned long case_idx; /* Case number of current case. */
84 int destructive; /* Is this a destructive reader? */
87 int fd; /* File descriptor. */
88 union value *buffer; /* I/O buffer. */
89 size_t buffer_pos; /* Offset of buffer position. */
90 struct ccase c; /* Current case. */
93 /* Doubly linked list of all casefiles. */
94 static struct casefile *casefiles;
96 /* Number of bytes of case allocated in in-memory casefiles. */
97 static size_t case_bytes;
99 static void register_atexit (void);
100 static void exit_handler (void);
102 static void reader_open_file (struct casereader *reader);
103 static void write_case_to_disk (struct casefile *cf, const struct ccase *c);
104 static void flush_buffer (struct casefile *cf);
105 static void fill_buffer (struct casereader *reader);
107 static int safe_open (const char *filename, int flags);
108 static int safe_close (int fd);
109 static int full_read (int fd, void *buffer, size_t size);
110 static int full_write (int fd, const void *buffer, size_t size);
112 /* Creates and returns a casefile to store cases of VALUE_CNT
113 `union value's each. */
115 casefile_create (size_t value_cnt)
117 struct casefile *cf = xmalloc (sizeof *cf);
118 cf->next = casefiles;
120 if (cf->next != NULL)
123 cf->value_cnt = value_cnt;
124 cf->case_acct_size = (cf->value_cnt + 4) * sizeof *cf->buffer;
126 cf->storage = MEMORY;
129 cf->being_destroyed = 0;
134 cf->buffer_size = ROUND_UP (cf->value_cnt, IO_BUF_SIZE);
135 if (cf->value_cnt > 0 && cf->buffer_size % cf->value_cnt > 64)
136 cf->buffer_size = cf->value_cnt;
142 /* Destroys casefile CF. */
144 casefile_destroy (struct casefile *cf)
148 if (cf->next != NULL)
149 cf->next->prev = cf->prev;
150 if (cf->prev != NULL)
151 cf->prev->next = cf->next;
153 casefiles = cf->next;
155 while (cf->readers != NULL)
156 casereader_destroy (cf->readers);
158 if (cf->cases != NULL)
160 size_t idx, block_cnt;
162 case_bytes -= cf->case_cnt * cf->case_acct_size;
163 for (idx = 0; idx < cf->case_cnt; idx++)
165 size_t block_idx = idx / CASES_PER_BLOCK;
166 size_t case_idx = idx % CASES_PER_BLOCK;
167 struct ccase *c = &cf->cases[block_idx][case_idx];
171 block_cnt = DIV_RND_UP (cf->case_cnt, CASES_PER_BLOCK);
172 for (idx = 0; idx < block_cnt; idx++)
173 free (cf->cases[idx]);
181 if (cf->filename != NULL && remove (cf->filename) == -1)
182 msg (ME, _("%s: Removing temporary file: %s."),
183 cf->filename, strerror (errno));
192 /* Returns nonzero only if casefile CF is stored in memory (instead of on
195 casefile_in_core (const struct casefile *cf)
199 return cf->storage == MEMORY;
202 /* Puts a casefile to "sleep", that is, minimizes the resources
203 needed for it by closing its file descriptor and freeing its
204 buffer. This is useful if we need so many casefiles that we
205 might not have enough memory and file descriptors to go
208 For simplicity, this implementation always converts the
209 casefile to reader mode. If this turns out to be a problem,
210 with a little extra work we could also support sleeping
213 casefile_sleep (const struct casefile *cf_)
215 struct casefile *cf = (struct casefile *) cf_;
218 casefile_mode_reader (cf);
219 casefile_to_disk (cf);
226 if (cf->buffer != NULL)
233 /* Returns the number of `union value's in a case for CF. */
235 casefile_get_value_cnt (const struct casefile *cf)
239 return cf->value_cnt;
242 /* Returns the number of cases in casefile CF. */
244 casefile_get_case_cnt (const struct casefile *cf)
251 /* Appends a copy of case C to casefile CF. Not valid after any
252 reader for CF has been created. */
254 casefile_append (struct casefile *cf, const struct ccase *c)
258 assert (cf->mode == WRITE);
260 /* Try memory first. */
261 if (cf->storage == MEMORY)
263 if (case_bytes < get_max_workspace ())
265 size_t block_idx = cf->case_cnt / CASES_PER_BLOCK;
266 size_t case_idx = cf->case_cnt % CASES_PER_BLOCK;
267 struct ccase new_case;
269 case_bytes += cf->case_acct_size;
270 case_clone (&new_case, c);
273 if ((block_idx & (block_idx - 1)) == 0)
275 size_t block_cap = block_idx == 0 ? 1 : block_idx * 2;
276 cf->cases = xrealloc (cf->cases,
277 sizeof *cf->cases * block_cap);
280 cf->cases[block_idx] = xmalloc (sizeof **cf->cases
284 case_move (&cf->cases[block_idx][case_idx], &new_case);
288 casefile_to_disk (cf);
289 assert (cf->storage == DISK);
290 write_case_to_disk (cf, c);
294 write_case_to_disk (cf, c);
299 /* Appends case C to casefile CF, which takes over ownership of
300 C. Not valid after any reader for CF has been created. */
302 casefile_append_xfer (struct casefile *cf, struct ccase *c)
304 casefile_append (cf, c);
308 /* Writes case C to casefile CF's disk buffer, first flushing the buffer to
309 disk if it would otherwise overflow. */
311 write_case_to_disk (struct casefile *cf, const struct ccase *c)
313 case_to_values (c, cf->buffer + cf->buffer_used, cf->value_cnt);
314 cf->buffer_used += cf->value_cnt;
315 if (cf->buffer_used + cf->value_cnt > cf->buffer_size)
319 /* If any bytes in CF's output buffer are used, flush them to
322 flush_buffer (struct casefile *cf)
324 if (cf->buffer_used > 0)
326 if (!full_write (cf->fd, cf->buffer,
327 cf->buffer_size * sizeof *cf->buffer))
328 msg (FE, _("Error writing temporary file: %s."), strerror (errno));
334 /* Creates a temporary file and stores its name in *FILENAME and
335 a file descriptor for it in *FD. Returns success. Caller is
336 responsible for freeing *FILENAME. */
338 make_temp_file (int *fd, char **filename)
340 const char *parent_dir;
342 assert (filename != NULL);
345 if (getenv ("TMPDIR") != NULL)
346 parent_dir = getenv ("TMPDIR");
348 parent_dir = P_tmpdir;
350 *filename = xmalloc (strlen (parent_dir) + 32);
351 sprintf (*filename, "%s%cpsppXXXXXX", parent_dir, DIR_SEPARATOR);
352 *fd = mkstemp (*filename);
355 msg (FE, _("%s: Creating temporary file: %s."),
356 *filename, strerror (errno));
364 /* If CF is currently stored in memory, writes it to disk. Readers, if any,
365 retain their current positions. */
367 casefile_to_disk (const struct casefile *cf_)
369 struct casefile *cf = (struct casefile *) cf_;
370 struct casereader *reader;
374 if (cf->storage == MEMORY)
376 size_t idx, block_cnt;
378 assert (cf->filename == NULL);
379 assert (cf->fd == -1);
380 assert (cf->buffer_used == 0);
383 if (!make_temp_file (&cf->fd, &cf->filename))
385 cf->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer);
386 memset (cf->buffer, 0, cf->buffer_size * sizeof *cf->buffer);
388 case_bytes -= cf->case_cnt * cf->case_acct_size;
389 for (idx = 0; idx < cf->case_cnt; idx++)
391 size_t block_idx = idx / CASES_PER_BLOCK;
392 size_t case_idx = idx % CASES_PER_BLOCK;
393 struct ccase *c = &cf->cases[block_idx][case_idx];
394 write_case_to_disk (cf, c);
398 block_cnt = DIV_RND_UP (cf->case_cnt, CASES_PER_BLOCK);
399 for (idx = 0; idx < block_cnt; idx++)
400 free (cf->cases[idx]);
405 if (cf->mode == READ)
408 for (reader = cf->readers; reader != NULL; reader = reader->next)
409 reader_open_file (reader);
413 /* Changes CF to reader mode, ensuring that no more cases may be
414 added. Creating a casereader for CF has the same effect. */
416 casefile_mode_reader (struct casefile *cf)
422 /* Creates and returns a casereader for CF. A casereader can be used to
423 sequentially read the cases in a casefile. */
425 casefile_get_reader (const struct casefile *cf_)
427 struct casefile *cf = (struct casefile *) cf_;
428 struct casereader *reader;
431 assert (!cf->being_destroyed);
433 /* Flush the buffer to disk if it's not empty. */
434 if (cf->mode == WRITE && cf->storage == DISK)
439 reader = xmalloc (sizeof *reader);
441 reader->next = cf->readers;
442 if (cf->readers != NULL)
443 reader->next->prev = reader;
445 cf->readers = reader;
446 reader->case_idx = 0;
448 reader->buffer = NULL;
449 reader->buffer_pos = 0;
450 case_nullify (&reader->c);
452 if (reader->cf->storage == DISK)
453 reader_open_file (reader);
458 /* Creates and returns a destructive casereader for CF. Like a
459 normal casereader, a destructive casereader sequentially reads
460 the cases in a casefile. Unlike a normal casereader, a
461 destructive reader cannot operate concurrently with any other
462 reader. (This restriction could be relaxed in a few ways, but
463 it is so far unnecessary for other code.) */
465 casefile_get_destructive_reader (struct casefile *cf)
467 struct casereader *reader;
469 assert (cf->readers == NULL);
470 reader = casefile_get_reader (cf);
471 reader->destructive = 1;
472 cf->being_destroyed = 1;
476 /* Opens a disk file for READER and seeks to the current position as indicated
477 by case_idx. Normally the current position is the beginning of the file,
478 but casefile_to_disk may cause the file to be opened at a different
481 reader_open_file (struct casereader *reader)
483 struct casefile *cf = reader->cf;
486 if (reader->case_idx >= cf->case_cnt)
496 reader->fd = safe_open (cf->filename, O_RDONLY);
498 msg (FE, _("%s: Opening temporary file: %s."),
499 cf->filename, strerror (errno));
502 if (cf->buffer != NULL)
504 reader->buffer = cf->buffer;
509 reader->buffer = xmalloc (cf->buffer_size * sizeof *cf->buffer);
510 memset (reader->buffer, 0, cf->buffer_size * sizeof *cf->buffer);
513 if (cf->value_cnt != 0)
515 size_t buffer_case_cnt = cf->buffer_size / cf->value_cnt;
516 file_ofs = ((off_t) reader->case_idx / buffer_case_cnt
517 * cf->buffer_size * sizeof *cf->buffer);
518 reader->buffer_pos = (reader->case_idx % buffer_case_cnt
523 if (lseek (reader->fd, file_ofs, SEEK_SET) != file_ofs)
524 msg (FE, _("%s: Seeking temporary file: %s."),
525 cf->filename, strerror (errno));
527 if (cf->case_cnt > 0 && cf->value_cnt > 0)
528 fill_buffer (reader);
530 case_create (&reader->c, cf->value_cnt);
533 /* Fills READER's buffer by reading a block from disk. */
535 fill_buffer (struct casereader *reader)
537 int retval = full_read (reader->fd, reader->buffer,
538 reader->cf->buffer_size * sizeof *reader->buffer);
540 msg (FE, _("%s: Reading temporary file: %s."),
541 reader->cf->filename, strerror (errno));
542 else if (retval != reader->cf->buffer_size * sizeof *reader->buffer)
543 msg (FE, _("%s: Temporary file ended unexpectedly."),
544 reader->cf->filename);
547 /* Returns the casefile that READER reads. */
548 const struct casefile *
549 casereader_get_casefile (const struct casereader *reader)
551 assert (reader != NULL);
556 /* Reads a copy of the next case from READER into C.
557 Caller is responsible for destroying C. */
559 casereader_read (struct casereader *reader, struct ccase *c)
561 assert (reader != NULL);
563 if (reader->case_idx >= reader->cf->case_cnt)
566 if (reader->cf->storage == MEMORY)
568 size_t block_idx = reader->case_idx / CASES_PER_BLOCK;
569 size_t case_idx = reader->case_idx % CASES_PER_BLOCK;
571 case_clone (c, &reader->cf->cases[block_idx][case_idx]);
577 if (reader->buffer_pos + reader->cf->value_cnt > reader->cf->buffer_size)
579 fill_buffer (reader);
580 reader->buffer_pos = 0;
583 case_from_values (&reader->c, reader->buffer + reader->buffer_pos,
584 reader->cf->value_cnt);
585 reader->buffer_pos += reader->cf->value_cnt;
588 case_clone (c, &reader->c);
593 /* Reads the next case from READER into C and transfers ownership
594 to the caller. Caller is responsible for destroying C. */
596 casereader_read_xfer (struct casereader *reader, struct ccase *c)
598 assert (reader != NULL);
600 if (reader->destructive == 0
601 || reader->case_idx >= reader->cf->case_cnt
602 || reader->cf->storage == DISK)
603 return casereader_read (reader, c);
606 size_t block_idx = reader->case_idx / CASES_PER_BLOCK;
607 size_t case_idx = reader->case_idx % CASES_PER_BLOCK;
608 struct ccase *read_case = &reader->cf->cases[block_idx][case_idx];
610 case_move (c, read_case);
616 /* Destroys READER. */
618 casereader_destroy (struct casereader *reader)
620 assert (reader != NULL);
622 if (reader->next != NULL)
623 reader->next->prev = reader->prev;
624 if (reader->prev != NULL)
625 reader->prev->next = reader->next;
626 if (reader->cf->readers == reader)
627 reader->cf->readers = reader->next;
629 if (reader->cf->buffer == NULL)
630 reader->cf->buffer = reader->buffer;
632 free (reader->buffer);
634 if (reader->fd != -1)
636 if (reader->cf->fd == -1)
637 reader->cf->fd = reader->fd;
639 safe_close (reader->fd);
642 case_destroy (&reader->c);
647 /* Calls open(), passing FILENAME and FLAGS, repeating as necessary
648 to deal with interrupted calls. */
650 safe_open (const char *filename, int flags)
656 fd = open (filename, flags);
658 while (fd == -1 && errno == EINTR);
663 /* Calls close(), passing FD, repeating as necessary to deal with
664 interrupted calls. */
665 static int safe_close (int fd)
673 while (retval == -1 && errno == EINTR);
678 /* Calls read(), passing FD, BUFFER, and SIZE, repeating as
679 necessary to deal with interrupted calls. */
681 full_read (int fd, void *buffer_, size_t size)
683 char *buffer = buffer_;
684 size_t bytes_read = 0;
686 while (bytes_read < size)
688 int retval = read (fd, buffer + bytes_read, size - bytes_read);
690 bytes_read += retval;
691 else if (retval == 0)
693 else if (errno != EINTR)
700 /* Calls write(), passing FD, BUFFER, and SIZE, repeating as
701 necessary to deal with interrupted calls. */
703 full_write (int fd, const void *buffer_, size_t size)
705 const char *buffer = buffer_;
706 size_t bytes_written = 0;
708 while (bytes_written < size)
710 int retval = write (fd, buffer + bytes_written, size - bytes_written);
712 bytes_written += retval;
713 else if (errno != EINTR)
717 return bytes_written;
720 /* Registers our exit handler with atexit() if it has not already
723 register_atexit (void)
725 static int registered = 0;
729 atexit (exit_handler);
733 /* atexit() handler that closes and deletes our temporary
738 while (casefiles != NULL)
739 casefile_destroy (casefiles);
747 static void test_casefile (int pattern, size_t value_cnt, size_t case_cnt);
748 static void get_random_case (struct ccase *, size_t value_cnt,
750 static void write_random_case (struct casefile *cf, size_t case_idx);
751 static void read_and_verify_random_case (struct casefile *cf,
752 struct casereader *reader,
754 static void fail_test (const char *message, ...);
757 cmd_debug_casefile (void)
759 static const size_t sizes[] =
761 1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 31, 55, 73,
762 100, 137, 257, 521, 1031, 2053
768 size_max = sizeof sizes / sizeof *sizes;
769 if (lex_match_id ("SMALL"))
777 return lex_end_of_command ();
779 for (pattern = 0; pattern < 5; pattern++)
783 for (size = sizes; size < sizes + size_max; size++)
787 for (case_cnt = 0; case_cnt <= case_max;
788 case_cnt = (case_cnt * 2) + 1)
789 test_casefile (pattern, *size, case_cnt);
792 printf ("Casefile tests succeeded.\n");
797 test_casefile (int pattern, size_t value_cnt, size_t case_cnt)
801 struct casereader *r1, *r2;
807 rng_seed (rng, &zero, sizeof zero);
808 cf = casefile_create (value_cnt);
809 for (i = 0; i < case_cnt; i++)
810 write_random_case (cf, i);
811 r1 = casefile_get_reader (cf);
812 r2 = casefile_get_reader (cf);
816 for (i = 0; i < case_cnt; i++)
818 read_and_verify_random_case (cf, r1, i);
819 read_and_verify_random_case (cf, r2, i);
823 for (i = 0; i < case_cnt; i++)
824 read_and_verify_random_case (cf, r1, i);
825 for (i = 0; i < case_cnt; i++)
826 read_and_verify_random_case (cf, r2, i);
831 for (i = j = 0; i < case_cnt; i++)
833 read_and_verify_random_case (cf, r1, i);
834 if (rng_get_int (rng) % pattern == 0)
835 read_and_verify_random_case (cf, r2, j++);
836 if (i == case_cnt / 2)
837 casefile_to_disk (cf);
839 for (; j < case_cnt; j++)
840 read_and_verify_random_case (cf, r2, j);
843 if (casereader_read (r1, &c))
844 fail_test ("Casereader 1 not at end of file.");
845 if (casereader_read (r2, &c))
846 fail_test ("Casereader 2 not at end of file.");
848 casereader_destroy (r1);
850 casereader_destroy (r2);
853 r1 = casefile_get_destructive_reader (cf);
854 for (i = 0; i < case_cnt; i++)
856 struct ccase read_case, expected_case;
858 get_random_case (&expected_case, value_cnt, i);
859 if (!casereader_read_xfer (r1, &read_case))
860 fail_test ("Premature end of casefile.");
861 for (j = 0; j < value_cnt; j++)
863 double a = case_num (&read_case, j);
864 double b = case_num (&expected_case, j);
866 fail_test ("Case %lu fails comparison.", (unsigned long) i);
868 case_destroy (&expected_case);
869 case_destroy (&read_case);
871 casereader_destroy (r1);
873 casefile_destroy (cf);
878 get_random_case (struct ccase *c, size_t value_cnt, size_t case_idx)
881 case_create (c, value_cnt);
882 for (i = 0; i < value_cnt; i++)
883 case_data_rw (c, i)->f = case_idx % 257 + i;
887 write_random_case (struct casefile *cf, size_t case_idx)
890 get_random_case (&c, casefile_get_value_cnt (cf), case_idx);
891 casefile_append_xfer (cf, &c);
895 read_and_verify_random_case (struct casefile *cf,
896 struct casereader *reader, size_t case_idx)
898 struct ccase read_case, expected_case;
902 value_cnt = casefile_get_value_cnt (cf);
903 get_random_case (&expected_case, value_cnt, case_idx);
904 if (!casereader_read (reader, &read_case))
905 fail_test ("Premature end of casefile.");
906 for (i = 0; i < value_cnt; i++)
908 double a = case_num (&read_case, i);
909 double b = case_num (&expected_case, i);
911 fail_test ("Case %lu fails comparison.", (unsigned long) case_idx);
913 case_destroy (&read_case);
914 case_destroy (&expected_case);
918 fail_test (const char *message, ...)
922 va_start (args, message);
923 vprintf (message, args);