1 /* PSPP - computes sample statistics.
2 Copyright (C) 2004 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
33 #include "workspace.h"
35 #define IO_BUF_SIZE 8192
37 /* A casefile is a sequentially accessible array of immutable
38 cases. It may be stored in memory or on disk as workspace
39 allows. Cases may be appended to the end of the file. Cases
40 may be read sequentially starting from the beginning of the
41 file. Once any cases have been read, no more cases may be
42 appended. The entire file is discarded at once. */
48 struct casefile *next, *prev; /* Next, prev in global list. */
49 size_t case_size; /* Case size in bytes. */
50 size_t case_list_size; /* Bytes to allocate for case_lists. */
51 unsigned long case_cnt; /* Number of cases stored. */
52 enum { MEMORY, DISK } storage; /* Where cases are stored. */
53 enum { WRITE, READ } mode; /* Is writing or reading allowed? */
54 struct casereader *readers; /* List of our readers. */
57 struct case_list *head, *tail; /* Beginning, end of list of cases. */
60 int fd; /* File descriptor, -1 if none. */
61 char *filename; /* Filename. */
62 unsigned char *buffer; /* I/O buffer, NULL if none. */
63 size_t buffer_used; /* Number of bytes used in buffer. */
64 size_t buffer_size; /* Buffer size in bytes. */
67 /* For reading out the casing in a casefile. */
70 struct casereader *next, *prev; /* Next, prev in casefile's list. */
71 struct casefile *cf; /* Our casefile. */
72 unsigned long case_idx; /* Case number of current case. */
75 struct case_list *cur; /* Current case. */
78 int fd; /* File descriptor. */
79 unsigned char *buffer; /* I/O buffer. */
80 size_t buffer_pos; /* Byte offset of buffer position. */
83 struct casefile *casefiles;
85 static void register_atexit (void);
86 static void exit_handler (void);
88 static void reader_open_file (struct casereader *reader);
89 static void write_case_to_disk (struct casefile *cf, const struct ccase *c);
90 static void flush_buffer (struct casefile *cf);
91 static void fill_buffer (struct casereader *reader);
93 static int safe_open (const char *filename, int flags);
94 static int safe_close (int fd);
95 static int full_read (int fd, char *buffer, size_t size);
96 static int full_write (int fd, const char *buffer, size_t size);
98 /* Creates and returns a casefile to store cases of CASE_SIZE bytes each. */
100 casefile_create (size_t case_size)
102 struct casefile *cf = xmalloc (sizeof *cf);
103 cf->next = casefiles;
105 if (cf->next != NULL)
108 cf->case_size = case_size;
109 cf->case_list_size = sizeof *cf->head + case_size - sizeof *cf->head->c.data;
111 cf->storage = MEMORY;
114 cf->head = cf->tail = NULL;
118 cf->buffer_size = ROUND_UP (case_size, IO_BUF_SIZE);
119 if (case_size > 0 && cf->buffer_size % case_size > 512)
120 cf->buffer_size = case_size;
126 /* Destroys casefile CF. */
128 casefile_destroy (struct casefile *cf)
132 struct case_list *iter, *next;
134 if (cf->next != NULL)
135 cf->next->prev = cf->prev;
136 if (cf->prev != NULL)
137 cf->prev->next = cf->next;
139 casefiles = cf->next;
141 while (cf->readers != NULL)
142 casereader_destroy (cf->readers);
144 for (iter = cf->head; iter != NULL; iter = next)
147 workspace_free (iter, cf->case_list_size);
153 if (cf->filename != NULL && remove (cf->filename) == -1)
154 msg (ME, _("%s: Removing temporary file: %s."),
155 cf->filename, strerror (errno));
163 /* Returns nonzero only if casefile CF is stored in memory (instead of on
166 casefile_in_core (const struct casefile *cf)
170 return cf->storage == MEMORY;
173 /* Returns the number of bytes in a case for CF. */
175 casefile_get_case_size (const struct casefile *cf)
179 return cf->case_size;
182 /* Returns the number of cases in casefile CF. */
184 casefile_get_case_cnt (const struct casefile *cf)
191 /* Appends case C to casefile CF. Not valid after any reader for CF has been
194 casefile_append (struct casefile *cf, const struct ccase *c)
198 assert (cf->mode == WRITE);
202 /* Try memory first. */
203 if (cf->storage == MEMORY)
205 struct case_list *new_case;
207 new_case = workspace_malloc (cf->case_list_size);
208 if (new_case != NULL)
210 memcpy (&new_case->c, c, cf->case_size);
211 new_case->next = NULL;
212 if (cf->head != NULL)
213 cf->tail->next = new_case;
220 casefile_to_disk (cf);
221 assert (cf->storage == DISK);
222 write_case_to_disk (cf, c);
226 write_case_to_disk (cf, c);
229 /* Writes case C to casefile CF's disk buffer, first flushing the buffer to
230 disk if it would otherwise overflow. */
232 write_case_to_disk (struct casefile *cf, const struct ccase *c)
234 memcpy (cf->buffer + cf->buffer_used, c->data, cf->case_size);
235 cf->buffer_used += cf->case_size;
236 if (cf->buffer_used + cf->case_size > cf->buffer_size)
242 flush_buffer (struct casefile *cf)
244 if (cf->buffer_used > 0)
246 if (!full_write (cf->fd, cf->buffer, cf->buffer_size))
247 msg (FE, _("Error writing temporary file: %s."), strerror (errno));
253 /* Creates a temporary file and stores its name in *FILENAME and
254 a file descriptor for it in *FD. Returns success. Caller is
255 responsible for freeing *FILENAME. */
257 make_temp_file (int *fd, char **filename)
259 const char *parent_dir;
261 assert (filename != NULL);
264 if (getenv ("TMPDIR") != NULL)
265 parent_dir = getenv ("TMPDIR");
267 parent_dir = P_tmpdir;
269 *filename = xmalloc (strlen (parent_dir) + 32);
270 sprintf (*filename, "%s%cpsppXXXXXX", parent_dir, DIR_SEPARATOR);
271 *fd = mkstemp (*filename);
274 msg (FE, _("%s: Creating temporary file: %s."),
275 *filename, strerror (errno));
283 /* If CF is currently stored in memory, writes it to disk. Readers, if any,
284 retain their current positions. */
286 casefile_to_disk (struct casefile *cf)
288 struct case_list *iter, *next;
289 struct casereader *reader;
293 if (cf->storage == MEMORY)
295 assert (cf->filename == NULL);
296 assert (cf->fd == -1);
297 assert (cf->buffer_used == 0);
300 if (!make_temp_file (&cf->fd, &cf->filename))
302 #if HAVE_POSIX_FADVISE
303 posix_fadvise (cf->fd, 0, 0, POSIX_FADV_SEQUENTIAL);
305 cf->buffer = xmalloc (cf->buffer_size);
306 memset (cf->buffer, 0, cf->buffer_size);
308 for (iter = cf->head; iter != NULL; iter = next)
311 write_case_to_disk (cf, &iter->c);
312 workspace_free (iter, cf->case_list_size);
315 cf->head = cf->tail = NULL;
317 for (reader = cf->readers; reader != NULL; reader = reader->next)
318 reader_open_file (reader);
322 /* Merges lists A and B into a single list, which is returned. Cases are
323 compared according to comparison function COMPARE, which receives auxiliary
325 static struct case_list *
326 merge (struct case_list *a, struct case_list *b,
327 int (*compare) (const struct ccase *,
328 const struct ccase *, void *aux),
331 struct case_list head;
332 struct case_list *tail = &head;
334 while (a != NULL && b != NULL)
335 if (compare (&a->c, &b->c, aux) < 0)
348 tail->next = a == NULL ? b : a;
353 /* Sorts the list beginning at FIRST, returning the new first case. Cases are
354 compared according to comparison function COMPARE, which receives auxiliary
356 static struct case_list *
357 merge_sort (struct case_list *first,
358 int (*compare) (const struct ccase *,
359 const struct ccase *, void *aux),
362 /* FIXME: we should use a "natural" merge sort to take
363 advantage of the natural order of the data. */
364 struct case_list *middle, *last, *tmp;
366 /* A list of zero or one elements is already sorted. */
367 if (first == NULL || first->next == NULL)
372 while (last != NULL && last->next != NULL)
374 middle = middle->next;
375 last = last->next->next;
378 middle = middle->next;
380 return merge (merge_sort (first, compare, aux),
381 merge_sort (middle, compare, aux),
385 /* Tries to sort casefile CF according to comparison function
386 COMPARE, which is passes auxiliary data AUX. If successful,
387 returns nonzero. Currently only sorting of in-memory
388 casefiles is implemented. */
390 casefile_sort (struct casefile *cf,
391 int (*compare) (const struct ccase *,
392 const struct ccase *, void *aux),
396 assert (compare != NULL);
400 if (cf->case_cnt < 2)
402 else if (cf->storage == DISK)
406 cf->head = cf->tail = merge_sort (cf->head, compare, aux);
407 while (cf->tail->next != NULL)
408 cf->tail = cf->tail->next;
414 /* Creates and returns a casereader for CF. A casereader can be used to
415 sequentially read the cases in a casefile. */
417 casefile_get_reader (const struct casefile *cf_)
419 struct casefile *cf = (struct casefile *) cf_;
420 struct casereader *reader;
422 /* Flush the buffer to disk if it's not empty. */
423 if (cf->mode == WRITE && cf->storage == DISK)
428 reader = xmalloc (sizeof *reader);
430 reader->next = cf->readers;
431 if (cf->readers != NULL)
432 reader->next->prev = reader;
434 cf->readers = reader;
435 reader->case_idx = 0;
438 reader->buffer = NULL;
439 reader->buffer_pos = 0;
441 if (reader->cf->storage == MEMORY)
442 reader->cur = cf->head;
444 reader_open_file (reader);
449 /* Opens a disk file for READER and seeks to the current position as indicated
450 by case_idx. Normally the current position is the beginning of the file,
451 but casefile_to_disk may cause the file to be opened at a different
454 reader_open_file (struct casereader *reader)
456 size_t buffer_case_cnt;
459 if (reader->case_idx >= reader->cf->case_cnt)
462 if (reader->cf->fd != -1)
464 reader->fd = reader->cf->fd;
469 reader->fd = safe_open (reader->cf->filename, O_RDONLY);
471 msg (FE, _("%s: Opening temporary file: %s."),
472 reader->cf->filename, strerror (errno));
475 if (reader->cf->buffer != NULL)
477 reader->buffer = reader->cf->buffer;
478 reader->cf->buffer = NULL;
482 reader->buffer = xmalloc (reader->cf->buffer_size);
483 memset (reader->buffer, 0, reader->cf->buffer_size);
486 if (reader->cf->case_size != 0)
488 buffer_case_cnt = reader->cf->buffer_size / reader->cf->case_size;
489 file_ofs = ((off_t) reader->case_idx
490 / buffer_case_cnt * reader->cf->buffer_size);
491 reader->buffer_pos = (reader->case_idx % buffer_case_cnt
492 * reader->cf->case_size);
496 #if HAVE_POSIX_FADVISE
497 posix_fadvise (reader->fd, file_ofs, 0, POSIX_FADV_SEQUENTIAL);
499 if (lseek (reader->fd, file_ofs, SEEK_SET) != file_ofs)
500 msg (FE, _("%s: Seeking temporary file: %s."),
501 reader->cf->filename, strerror (errno));
503 if (reader->cf->case_cnt > 0 && reader->cf->case_size > 0)
504 fill_buffer (reader);
507 /* Fills READER's buffer by reading a block from disk. */
509 fill_buffer (struct casereader *reader)
511 int retval = full_read (reader->fd, reader->buffer, reader->cf->buffer_size);
513 msg (FE, _("%s: Reading temporary file: %s."),
514 reader->cf->filename, strerror (errno));
515 else if (retval != reader->cf->buffer_size)
516 msg (FE, _("%s: Temporary file ended unexpectedly."),
517 reader->cf->filename);
521 casereader_read (struct casereader *reader, const struct ccase **c)
523 assert (reader != NULL);
525 if (reader->case_idx >= reader->cf->case_cnt)
532 if (reader->cf->storage == MEMORY)
534 assert (reader->cur != NULL);
535 *c = &reader->cur->c;
536 reader->cur = reader->cur->next;
541 if (reader->buffer_pos + reader->cf->case_size > reader->cf->buffer_size)
543 fill_buffer (reader);
544 reader->buffer_pos = 0;
547 *c = (struct ccase *) (reader->buffer + reader->buffer_pos);
548 reader->buffer_pos += reader->cf->case_size;
554 casereader_destroy (struct casereader *reader)
556 assert (reader != NULL);
558 if (reader->next != NULL)
559 reader->next->prev = reader->prev;
560 if (reader->prev != NULL)
561 reader->prev->next = reader->next;
562 if (reader->cf->readers == reader)
563 reader->cf->readers = reader->next;
565 if (reader->cf->buffer == NULL)
566 reader->cf->buffer = reader->buffer;
568 free (reader->buffer);
570 if (reader->cf->fd == -1)
571 reader->cf->fd = reader->fd;
573 safe_close (reader->fd);
579 safe_open (const char *filename, int flags)
585 fd = open (filename, flags);
587 while (fd == -1 && errno == EINTR);
592 static int safe_close (int fd)
600 while (retval == -1 && errno == EINTR);
606 full_read (int fd, char *buffer, size_t size)
608 size_t bytes_read = 0;
610 while (bytes_read < size)
612 int retval = read (fd, buffer + bytes_read, size - bytes_read);
614 bytes_read += retval;
615 else if (retval == 0)
617 else if (errno != EINTR)
625 full_write (int fd, const char *buffer, size_t size)
627 size_t bytes_written = 0;
629 while (bytes_written < size)
631 int retval = write (fd, buffer + bytes_written, size - bytes_written);
633 bytes_written += retval;
634 else if (errno != EINTR)
638 return bytes_written;
642 register_atexit (void)
644 static int registered = 0;
648 atexit (exit_handler);
655 while (casefiles != NULL)
656 casefile_destroy (casefiles);
664 static void test_casefile (int pattern, size_t case_size, size_t case_cnt);
665 static struct ccase *get_random_case (size_t case_size, size_t case_idx);
666 static void write_random_case (struct casefile *cf, size_t case_idx);
667 static void read_and_verify_random_case (struct casefile *cf,
668 struct casereader *reader,
670 static void fail_test (const char *message, ...);
673 cmd_debug_casefile (void)
675 static const size_t sizes[] =
677 1, 2, 3, 4, 5, 6, 7, 14, 15, 16, 17, 31, 55, 73,
678 100, 137, 257, 521, 1031, 2053
684 size_max = sizeof sizes / sizeof *sizes;
685 if (lex_match_id ("SMALL"))
693 return lex_end_of_command ();
695 for (pattern = 0; pattern < 5; pattern++)
699 for (size = sizes; size < sizes + size_max; size++)
703 for (case_cnt = 0; case_cnt <= case_max;
704 case_cnt = (case_cnt * 2) + 1)
705 test_casefile (pattern, *size * sizeof (union value), case_cnt);
708 printf ("Casefile tests succeeded.\n");
713 test_casefile (int pattern, size_t case_size, size_t case_cnt)
717 struct casereader *r1, *r2;
718 const struct ccase *c;
723 rng_seed (rng, &zero, sizeof zero);
724 cf = casefile_create (case_size);
725 for (i = 0; i < case_cnt; i++)
726 write_random_case (cf, i);
727 r1 = casefile_get_reader (cf);
728 r2 = casefile_get_reader (cf);
732 for (i = 0; i < case_cnt; i++)
734 read_and_verify_random_case (cf, r1, i);
735 read_and_verify_random_case (cf, r2, i);
739 for (i = 0; i < case_cnt; i++)
740 read_and_verify_random_case (cf, r1, i);
741 for (i = 0; i < case_cnt; i++)
742 read_and_verify_random_case (cf, r2, i);
747 for (i = j = 0; i < case_cnt; i++)
749 read_and_verify_random_case (cf, r1, i);
750 if (rng_get_int (rng) % pattern == 0)
751 read_and_verify_random_case (cf, r2, j++);
752 if (i == case_cnt / 2)
753 casefile_to_disk (cf);
755 for (; j < case_cnt; j++)
756 read_and_verify_random_case (cf, r2, j);
759 if (casereader_read (r1, &c))
760 fail_test ("Casereader 1 not at end of file.");
761 if (casereader_read (r2, &c))
762 fail_test ("Casereader 2 not at end of file.");
764 casereader_destroy (r1);
766 casereader_destroy (r2);
767 casefile_destroy (cf);
771 static struct ccase *
772 get_random_case (size_t case_size, size_t case_idx)
774 struct ccase *c = xmalloc (case_size);
775 memset (c, case_idx % 257, case_size);
780 write_random_case (struct casefile *cf, size_t case_idx)
782 struct ccase *c = get_random_case (casefile_get_case_size (cf), case_idx);
783 casefile_append (cf, c);
788 read_and_verify_random_case (struct casefile *cf,
789 struct casereader *reader, size_t case_idx)
791 const struct ccase *read_case;
792 struct ccase *expected_case;
795 case_size = casefile_get_case_size (cf);
796 expected_case = get_random_case (case_size, case_idx);
797 if (!casereader_read (reader, &read_case))
798 fail_test ("Premature end of casefile.");
799 if (memcmp (read_case, expected_case, case_size))
800 fail_test ("Case %lu fails comparison.", (unsigned long) case_idx);
801 free (expected_case);
805 fail_test (const char *message, ...)
809 va_start (args, message);
810 vprintf (message, args);