1 /* PSPP - computes sample statistics.
2 Copyright (C) 2004 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
33 #include "workspace.h"
35 #ifdef HAVE_VALGRIND_VALGRIND_H
36 #include <valgrind/valgrind.h>
39 #define IO_BUF_SIZE 8192
41 /* A casefile is a sequentially accessible array of immutable
42 cases. It may be stored in memory or on disk as workspace
43 allows. Cases may be appended to the end of the file. Cases
44 may be read sequentially starting from the beginning of the
45 file. Once any cases have been read, no more cases may be
46 appended. The entire file is discarded at once. */
52 struct casefile *next, *prev; /* Next, prev in global list. */
53 size_t case_size; /* Case size in bytes. */
54 size_t case_list_size; /* Bytes to allocate for case_lists. */
55 unsigned long case_cnt; /* Number of cases stored. */
56 enum { MEMORY, DISK } storage; /* Where cases are stored. */
57 enum { WRITE, READ } mode; /* Is writing or reading allowed? */
58 struct casereader *readers; /* List of our readers. */
61 struct case_list *head, *tail; /* Beginning, end of list of cases. */
64 int fd; /* File descriptor, -1 if none. */
65 char *filename; /* Filename. */
66 unsigned char *buffer; /* I/O buffer, NULL if none. */
67 size_t buffer_used; /* Number of bytes used in buffer. */
68 size_t buffer_size; /* Buffer size in bytes. */
71 /* For reading out the casing in a casefile. */
74 struct casereader *next, *prev; /* Next, prev in casefile's list. */
75 struct casefile *cf; /* Our casefile. */
76 unsigned long case_idx; /* Case number of current case. */
79 struct case_list *cur; /* Current case. */
82 int fd; /* File descriptor. */
83 unsigned char *buffer; /* I/O buffer. */
84 size_t buffer_pos; /* Byte offset of buffer position. */
87 struct casefile *casefiles;
89 static void register_atexit (void);
90 static void exit_handler (void);
92 static void reader_open_file (struct casereader *reader);
93 static void write_case_to_disk (struct casefile *cf, const struct ccase *c);
94 static void flush_buffer (struct casefile *cf);
95 static void fill_buffer (struct casereader *reader);
97 static int safe_open (const char *filename, int flags);
98 static int safe_close (int fd);
99 static int full_read (int fd, char *buffer, size_t size);
100 static int full_write (int fd, const char *buffer, size_t size);
102 /* Creates and returns a casefile to store cases of CASE_SIZE bytes each. */
104 casefile_create (size_t case_size)
106 struct casefile *cf = xmalloc (sizeof *cf);
107 cf->next = casefiles;
109 if (cf->next != NULL)
112 cf->case_size = case_size;
113 cf->case_list_size = sizeof *cf->head + case_size - sizeof *cf->head->c.data;
115 cf->storage = MEMORY;
118 cf->head = cf->tail = NULL;
122 cf->buffer_size = ROUND_UP (case_size, IO_BUF_SIZE);
123 if (case_size > 0 && cf->buffer_size % case_size > 512)
124 cf->buffer_size = case_size;
130 /* Destroys casefile CF. */
132 casefile_destroy (struct casefile *cf)
136 struct case_list *iter, *next;
138 if (cf->next != NULL)
139 cf->next->prev = cf->prev;
140 if (cf->prev != NULL)
141 cf->prev->next = cf->next;
143 casefiles = cf->next;
145 while (cf->readers != NULL)
146 casereader_destroy (cf->readers);
148 for (iter = cf->head; iter != NULL; iter = next)
151 workspace_free (iter, cf->case_list_size);
157 if (cf->filename != NULL && remove (cf->filename) == -1)
158 msg (ME, _("%s: Removing temporary file: %s."),
159 cf->filename, strerror (errno));
168 /* Returns nonzero only if casefile CF is stored in memory (instead of on
171 casefile_in_core (const struct casefile *cf)
175 return cf->storage == MEMORY;
178 /* Returns the number of bytes in a case for CF. */
180 casefile_get_case_size (const struct casefile *cf)
184 return cf->case_size;
187 /* Returns the number of cases in casefile CF. */
189 casefile_get_case_cnt (const struct casefile *cf)
196 /* Appends case C to casefile CF. Not valid after any reader for CF has been
199 casefile_append (struct casefile *cf, const struct ccase *c)
203 assert (cf->mode == WRITE);
207 /* Try memory first. */
208 if (cf->storage == MEMORY)
210 struct case_list *new_case;
212 new_case = workspace_malloc (cf->case_list_size);
213 if (new_case != NULL)
215 memcpy (&new_case->c, c, cf->case_size);
216 new_case->next = NULL;
217 if (cf->head != NULL)
218 cf->tail->next = new_case;
225 casefile_to_disk (cf);
226 assert (cf->storage == DISK);
227 write_case_to_disk (cf, c);
231 write_case_to_disk (cf, c);
234 /* Writes case C to casefile CF's disk buffer, first flushing the buffer to
235 disk if it would otherwise overflow. */
237 write_case_to_disk (struct casefile *cf, const struct ccase *c)
239 memcpy (cf->buffer + cf->buffer_used, c->data, cf->case_size);
240 cf->buffer_used += cf->case_size;
241 if (cf->buffer_used + cf->case_size > cf->buffer_size)
247 flush_buffer (struct casefile *cf)
249 if (cf->buffer_used > 0)
251 if (!full_write (cf->fd, cf->buffer, cf->buffer_size))
252 msg (FE, _("Error writing temporary file: %s."), strerror (errno));
258 /* Creates a temporary file and stores its name in *FILENAME and
259 a file descriptor for it in *FD. Returns success. Caller is
260 responsible for freeing *FILENAME. */
262 make_temp_file (int *fd, char **filename)
264 const char *parent_dir;
266 assert (filename != NULL);
269 if (getenv ("TMPDIR") != NULL)
270 parent_dir = getenv ("TMPDIR");
272 parent_dir = P_tmpdir;
274 *filename = xmalloc (strlen (parent_dir) + 32);
275 sprintf (*filename, "%s%cpsppXXXXXX", parent_dir, DIR_SEPARATOR);
276 *fd = mkstemp (*filename);
279 msg (FE, _("%s: Creating temporary file: %s."),
280 *filename, strerror (errno));
289 call_posix_fadvise (int fd UNUSED,
290 off_t offset UNUSED, off_t len UNUSED,
293 #ifdef HAVE_VALGRIND_VALGRIND_H
294 /* Valgrind doesn't know about posix_fadvise() as of this
296 if (RUNNING_ON_VALGRIND)
300 #ifdef HAVE_POSIX_FADVISE
301 posix_fadvise (fd, offset, len, advice);
305 /* If CF is currently stored in memory, writes it to disk. Readers, if any,
306 retain their current positions. */
308 casefile_to_disk (struct casefile *cf)
310 struct case_list *iter, *next;
311 struct casereader *reader;
315 if (cf->storage == MEMORY)
317 assert (cf->filename == NULL);
318 assert (cf->fd == -1);
319 assert (cf->buffer_used == 0);
322 if (!make_temp_file (&cf->fd, &cf->filename))
324 call_posix_fadvise (cf->fd, 0, 0, POSIX_FADV_SEQUENTIAL);
325 cf->buffer = xmalloc (cf->buffer_size);
326 memset (cf->buffer, 0, cf->buffer_size);
328 for (iter = cf->head; iter != NULL; iter = next)
331 write_case_to_disk (cf, &iter->c);
332 workspace_free (iter, cf->case_list_size);
335 cf->head = cf->tail = NULL;
337 for (reader = cf->readers; reader != NULL; reader = reader->next)
338 reader_open_file (reader);
342 /* Merges lists A and B into a single list, which is returned. Cases are
343 compared according to comparison function COMPARE, which receives auxiliary
345 static struct case_list *
346 merge (struct case_list *a, struct case_list *b,
347 int (*compare) (const struct ccase *,
348 const struct ccase *, void *aux),
351 struct case_list head;
352 struct case_list *tail = &head;
354 while (a != NULL && b != NULL)
355 if (compare (&a->c, &b->c, aux) < 0)
368 tail->next = a == NULL ? b : a;
373 /* Sorts the list beginning at FIRST, returning the new first case. Cases are
374 compared according to comparison function COMPARE, which receives auxiliary
376 static struct case_list *
377 merge_sort (struct case_list *first,
378 int (*compare) (const struct ccase *,
379 const struct ccase *, void *aux),
382 /* FIXME: we should use a "natural" merge sort to take
383 advantage of the natural order of the data. */
384 struct case_list *middle, *last, *tmp;
386 /* A list of zero or one elements is already sorted. */
387 if (first == NULL || first->next == NULL)
392 while (last != NULL && last->next != NULL)
394 middle = middle->next;
395 last = last->next->next;
398 middle = middle->next;
400 return merge (merge_sort (first, compare, aux),
401 merge_sort (middle, compare, aux),
405 /* Tries to sort casefile CF according to comparison function
406 COMPARE, which is passes auxiliary data AUX. If successful,
407 returns nonzero. Currently only sorting of in-memory
408 casefiles is implemented. */
410 casefile_sort (struct casefile *cf,
411 int (*compare) (const struct ccase *,
412 const struct ccase *, void *aux),
416 assert (compare != NULL);
420 if (cf->case_cnt < 2)
422 else if (cf->storage == DISK)
426 cf->head = cf->tail = merge_sort (cf->head, compare, aux);
427 while (cf->tail->next != NULL)
428 cf->tail = cf->tail->next;
434 /* Creates and returns a casereader for CF. A casereader can be used to
435 sequentially read the cases in a casefile. */
437 casefile_get_reader (const struct casefile *cf_)
439 struct casefile *cf = (struct casefile *) cf_;
440 struct casereader *reader;
442 /* Flush the buffer to disk if it's not empty. */
443 if (cf->mode == WRITE && cf->storage == DISK)
448 reader = xmalloc (sizeof *reader);
450 reader->next = cf->readers;
451 if (cf->readers != NULL)
452 reader->next->prev = reader;
454 cf->readers = reader;
455 reader->case_idx = 0;
458 reader->buffer = NULL;
459 reader->buffer_pos = 0;
461 if (reader->cf->storage == MEMORY)
462 reader->cur = cf->head;
464 reader_open_file (reader);
469 /* Opens a disk file for READER and seeks to the current position as indicated
470 by case_idx. Normally the current position is the beginning of the file,
471 but casefile_to_disk may cause the file to be opened at a different
474 reader_open_file (struct casereader *reader)
476 size_t buffer_case_cnt;
479 if (reader->case_idx >= reader->cf->case_cnt)
482 if (reader->cf->fd != -1)
484 reader->fd = reader->cf->fd;
489 reader->fd = safe_open (reader->cf->filename, O_RDONLY);
491 msg (FE, _("%s: Opening temporary file: %s."),
492 reader->cf->filename, strerror (errno));
495 if (reader->cf->buffer != NULL)
497 reader->buffer = reader->cf->buffer;
498 reader->cf->buffer = NULL;
502 reader->buffer = xmalloc (reader->cf->buffer_size);
503 memset (reader->buffer, 0, reader->cf->buffer_size);
506 if (reader->cf->case_size != 0)
508 buffer_case_cnt = reader->cf->buffer_size / reader->cf->case_size;
509 file_ofs = ((off_t) reader->case_idx
510 / buffer_case_cnt * reader->cf->buffer_size);
511 reader->buffer_pos = (reader->case_idx % buffer_case_cnt
512 * reader->cf->case_size);
516 call_posix_fadvise (reader->fd, file_ofs, 0, POSIX_FADV_SEQUENTIAL);
517 if (lseek (reader->fd, file_ofs, SEEK_SET) != file_ofs)
518 msg (FE, _("%s: Seeking temporary file: %s."),
519 reader->cf->filename, strerror (errno));
521 if (reader->cf->case_cnt > 0 && reader->cf->case_size > 0)
522 fill_buffer (reader);
525 /* Fills READER's buffer by reading a block from disk. */
527 fill_buffer (struct casereader *reader)
529 int retval = full_read (reader->fd, reader->buffer, reader->cf->buffer_size);
531 msg (FE, _("%s: Reading temporary file: %s."),
532 reader->cf->filename, strerror (errno));
533 else if (retval != reader->cf->buffer_size)
534 msg (FE, _("%s: Temporary file ended unexpectedly."),
535 reader->cf->filename);
539 casereader_read (struct casereader *reader, const struct ccase **c)
541 assert (reader != NULL);
543 if (reader->case_idx >= reader->cf->case_cnt)
550 if (reader->cf->storage == MEMORY)
552 assert (reader->cur != NULL);
553 *c = &reader->cur->c;
554 reader->cur = reader->cur->next;
559 if (reader->buffer_pos + reader->cf->case_size > reader->cf->buffer_size)
561 fill_buffer (reader);
562 reader->buffer_pos = 0;
565 *c = (struct ccase *) (reader->buffer + reader->buffer_pos);
566 reader->buffer_pos += reader->cf->case_size;
572 casereader_destroy (struct casereader *reader)
574 assert (reader != NULL);
576 if (reader->next != NULL)
577 reader->next->prev = reader->prev;
578 if (reader->prev != NULL)
579 reader->prev->next = reader->next;
580 if (reader->cf->readers == reader)
581 reader->cf->readers = reader->next;
583 if (reader->cf->buffer == NULL)
584 reader->cf->buffer = reader->buffer;
586 free (reader->buffer);
588 if (reader->fd != -1)
590 if (reader->cf->fd == -1)
591 reader->cf->fd = reader->fd;
593 safe_close (reader->fd);
600 safe_open (const char *filename, int flags)
606 fd = open (filename, flags);
608 while (fd == -1 && errno == EINTR);
613 static int safe_close (int fd)
621 while (retval == -1 && errno == EINTR);
627 full_read (int fd, char *buffer, size_t size)
629 size_t bytes_read = 0;
631 while (bytes_read < size)
633 int retval = read (fd, buffer + bytes_read, size - bytes_read);
635 bytes_read += retval;
636 else if (retval == 0)
638 else if (errno != EINTR)
646 full_write (int fd, const char *buffer, size_t size)
648 size_t bytes_written = 0;
650 while (bytes_written < size)
652 int retval = write (fd, buffer + bytes_written, size - bytes_written);
654 bytes_written += retval;
655 else if (errno != EINTR)
659 return bytes_written;
663 register_atexit (void)
665 static int registered = 0;
669 atexit (exit_handler);
676 while (casefiles != NULL)
677 casefile_destroy (casefiles);
685 static void test_casefile (int pattern, size_t case_size, size_t case_cnt);
686 static struct ccase *get_random_case (size_t case_size, size_t case_idx);
687 static void write_random_case (struct casefile *cf, size_t case_idx);
688 static void read_and_verify_random_case (struct casefile *cf,
689 struct casereader *reader,
691 static void fail_test (const char *message, ...);
694 cmd_debug_casefile (void)
696 static const size_t sizes[] =
698 1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 31, 55, 73,
699 100, 137, 257, 521, 1031, 2053
705 size_max = sizeof sizes / sizeof *sizes;
706 if (lex_match_id ("SMALL"))
714 return lex_end_of_command ();
716 for (pattern = 0; pattern < 5; pattern++)
720 for (size = sizes; size < sizes + size_max; size++)
724 for (case_cnt = 0; case_cnt <= case_max;
725 case_cnt = (case_cnt * 2) + 1)
726 test_casefile (pattern, *size * sizeof (union value), case_cnt);
729 printf ("Casefile tests succeeded.\n");
734 test_casefile (int pattern, size_t case_size, size_t case_cnt)
738 struct casereader *r1, *r2;
739 const struct ccase *c;
744 rng_seed (rng, &zero, sizeof zero);
745 cf = casefile_create (case_size);
746 for (i = 0; i < case_cnt; i++)
747 write_random_case (cf, i);
748 r1 = casefile_get_reader (cf);
749 r2 = casefile_get_reader (cf);
753 for (i = 0; i < case_cnt; i++)
755 read_and_verify_random_case (cf, r1, i);
756 read_and_verify_random_case (cf, r2, i);
760 for (i = 0; i < case_cnt; i++)
761 read_and_verify_random_case (cf, r1, i);
762 for (i = 0; i < case_cnt; i++)
763 read_and_verify_random_case (cf, r2, i);
768 for (i = j = 0; i < case_cnt; i++)
770 read_and_verify_random_case (cf, r1, i);
771 if (rng_get_int (rng) % pattern == 0)
772 read_and_verify_random_case (cf, r2, j++);
773 if (i == case_cnt / 2)
774 casefile_to_disk (cf);
776 for (; j < case_cnt; j++)
777 read_and_verify_random_case (cf, r2, j);
780 if (casereader_read (r1, &c))
781 fail_test ("Casereader 1 not at end of file.");
782 if (casereader_read (r2, &c))
783 fail_test ("Casereader 2 not at end of file.");
785 casereader_destroy (r1);
787 casereader_destroy (r2);
788 casefile_destroy (cf);
792 static struct ccase *
793 get_random_case (size_t case_size, size_t case_idx)
795 struct ccase *c = xmalloc (case_size);
796 memset (c, case_idx % 257, case_size);
801 write_random_case (struct casefile *cf, size_t case_idx)
803 struct ccase *c = get_random_case (casefile_get_case_size (cf), case_idx);
804 casefile_append (cf, c);
809 read_and_verify_random_case (struct casefile *cf,
810 struct casereader *reader, size_t case_idx)
812 const struct ccase *read_case;
813 struct ccase *expected_case;
816 case_size = casefile_get_case_size (cf);
817 expected_case = get_random_case (case_size, case_idx);
818 if (!casereader_read (reader, &read_case))
819 fail_test ("Premature end of casefile.");
820 if (memcmp (read_case, expected_case, case_size))
821 fail_test ("Case %lu fails comparison.", (unsigned long) case_idx);
822 free (expected_case);
826 fail_test (const char *message, ...)
830 va_start (args, message);
831 vprintf (message, args);