1 /* PSPP - computes sample statistics.
2 Copyright (C) 2004, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 #include "casefile-private.h"
31 #include <libpspp/alloc.h>
33 #include <libpspp/compiler.h>
34 #include <libpspp/message.h>
35 #include "full-read.h"
36 #include "full-write.h"
37 #include <libpspp/misc.h>
38 #include "make-file.h"
43 #define _(msgid) gettext (msgid)
45 #define IO_BUF_SIZE (8192 / sizeof (union value))
47 /* A fastfile represents a sequentially accessible stream of
50 If workspace allows, a fastfile is maintained in memory. If
51 workspace overflows, then the fastfile is pushed to disk. In
52 either case the interface presented to callers is kept the
55 The life cycle of a fastfile consists of up to three phases:
57 1. Writing. The fastfile initially contains no cases. In
58 this phase, any number of cases may be appended to the
59 end of a fastfile. (Cases are never inserted in the
60 middle or before the beginning of a fastfile.)
62 Use casefile_append or casefile_append_xfer to
63 append a case to a fastfile.
65 2. Reading. The fastfile may be read sequentially,
66 starting from the beginning, by "casereaders". Any
67 number of casereaders may be created, at any time,
68 during the reading phase. Each casereader has an
69 independent position in the fastfile.
71 Ordinary casereaders may only move forward. They
72 cannot move backward to arbitrary records or seek
73 randomly. Cloning casereaders is possible, but it is
76 Use casefile_get_reader to create a casereader for
77 use in phase 2. This also transitions from phase 1 to
78 phase 2. Calling fastfile_mode_reader makes the same
79 transition, without creating a casereader.
81 Use casereader_read or casereader_read_xfer to read
82 a case from a casereader. Use casereader_destroy to
83 discard a casereader when it is no longer needed.
85 3. Destruction. This phase is optional. The fastfile is
86 also read with casereaders in this phase, but the
87 ability to create new casereaders is curtailed.
89 In this phase, casereaders could still be cloned (once
90 we eventually implement cloning).
92 To transition from phase 1 or 2 to phase 3 and create a
93 casereader, call casefile_get_destructive_reader().
94 The same functions apply to the casereader obtained
95 this way as apply to casereaders obtained in phase 2.
97 After casefile_get_destructive_reader is called, no
98 more casereaders may be created. (If cloning of
99 casereaders were implemented, it would still be
102 The purpose of the limitations applied to casereaders
103 in phase 3 is to allow in-memory fastfiles to fully
104 transfer ownership of cases to the casereaders,
105 avoiding the need for extra copies of case data. For
106 relatively static data sets with many variables, I
107 suspect (without evidence) that this may be a big
110 When a fastfile is no longer needed, it may be destroyed with
111 casefile_destroy. This function will also destroy any
112 remaining casereaders. */
114 /* FIXME: should we implement compression? */
116 /* In-memory cases are arranged in an array of arrays. The top
117 level is variable size and the size of each bottom level array
118 is fixed at the number of cases defined here. */
119 #define CASES_PER_BLOCK 128
121 static const struct class_casefile class;
126 struct casefile cf; /* Parent */
128 size_t value_cnt; /* Case size in `union value's. */
129 size_t case_acct_size; /* Case size for accounting. */
130 unsigned long case_cnt; /* Number of cases stored. */
131 enum { MEMORY, DISK } storage; /* Where cases are stored. */
132 enum { WRITE, READ } mode; /* Is writing or reading allowed? */
134 bool ok; /* False after I/O error. */
136 /* Memory storage. */
137 struct ccase **cases; /* Pointer to array of cases. */
140 int fd; /* File descriptor, -1 if none. */
141 char *file_name; /* File name. */
142 union value *buffer; /* I/O buffer, NULL if none. */
143 size_t buffer_used; /* Number of values used in buffer. */
144 size_t buffer_size; /* Buffer size in values. */
148 static const struct class_casereader class_reader;
150 /* For reading out the cases in a fastfile. */
151 struct fastfilereader
153 struct casereader cr; /* Parent */
155 unsigned long case_idx; /* Case number of current case. */
158 int fd; /* File descriptor. */
159 off_t file_ofs; /* Current position in fd. */
160 off_t buffer_ofs; /* File offset of buffer start. */
161 union value *buffer; /* I/O buffer. */
162 size_t buffer_pos; /* Offset of buffer position. */
163 struct ccase c; /* Current case. */
167 static void io_error (struct fastfile *, const char *, ...)
168 PRINTF_FORMAT (2, 3);
169 static int safe_open (const char *file_name, int flags);
170 static int safe_close (int fd);
171 static void write_case_to_disk (struct fastfile *, const struct ccase *);
172 static void flush_buffer (struct fastfile *);
174 static void reader_open_file (struct fastfilereader *);
176 static void seek_and_fill_buffer (struct fastfilereader *);
177 static bool fill_buffer (struct fastfilereader *);
180 /* Number of bytes of case allocated in in-memory fastfiles. */
181 static size_t case_bytes;
183 /* Destroys READER. */
184 static void fastfilereader_destroy (struct casereader *cr)
186 struct fastfilereader *reader = (struct fastfilereader *) cr;
187 struct fastfile *ff = (struct fastfile *) casereader_get_casefile (cr);
189 if (ff->buffer == NULL)
190 ff->buffer = reader->buffer;
192 free (reader->buffer);
194 if (reader->fd != -1)
199 safe_close (reader->fd);
202 case_destroy (&reader->c);
209 /* Return the case number of the current case */
211 fastfilereader_cnum (const struct casereader *cr)
213 const struct fastfilereader *ffr = (const struct fastfilereader *) cr;
214 return ffr->case_idx;
218 /* Returns the next case pointed to by FFR and increments
219 FFR's pointer. Returns NULL if FFR points beyond the last case.
221 static struct ccase *
222 fastfilereader_get_next_case (struct casereader *cr)
224 struct fastfile *ff = (struct fastfile *) casereader_get_casefile (cr);
225 struct fastfilereader *ffr = (struct fastfilereader *) cr;
226 struct ccase *read_case = NULL ;
228 if ( ffr->case_idx >= ff->case_cnt )
231 if (ff->storage == MEMORY )
233 size_t block_idx = ffr->case_idx / CASES_PER_BLOCK;
234 size_t case_idx = ffr->case_idx % CASES_PER_BLOCK;
235 read_case = &ff->cases[block_idx][case_idx];
239 if (ffr->buffer_pos + ff->value_cnt > ff->buffer_size)
241 if (!fill_buffer (ffr))
246 case_from_values (&ffr->c, ffr->buffer + ffr->buffer_pos,
248 ffr->buffer_pos += ff->value_cnt;
257 /* Creates and returns a casereader for CF. A casereader can be used to
258 sequentially read the cases in a fastfile. */
259 static struct casereader *
260 fastfile_get_reader (const struct casefile *cf_)
262 struct casefile *cf = (struct casefile *) cf_;
263 struct fastfilereader *ffr = xzalloc (sizeof *ffr);
264 struct casereader *reader = (struct casereader *) ffr;
265 struct fastfile *ff = (struct fastfile *) cf;
267 assert (!cf->being_destroyed);
269 /* Flush the buffer to disk if it's not empty. */
270 if (ff->mode == WRITE && ff->storage == DISK)
275 casereader_register (cf, reader, &class_reader);
278 reader->destructive = 0;
282 case_nullify (&ffr->c);
284 if (ff->storage == DISK)
285 reader_open_file (ffr);
290 /* Returns the number of `union value's in a case for CF. */
292 fastfile_get_value_cnt (const struct casefile *cf)
294 const struct fastfile *ff = (const struct fastfile *) cf;
295 return ff->value_cnt;
298 /* Appends a copy of case C to fastfile CF. Not valid after any
299 reader for CF has been created.
300 Returns true if successful, false if an I/O error occurred. */
302 fastfile_append (struct casefile *cf, const struct ccase *c)
304 struct fastfile *ff = (struct fastfile *) cf;
305 assert (ff->mode == WRITE);
308 /* Try memory first. */
309 if (ff->storage == MEMORY)
311 if (case_bytes < get_workspace ())
313 size_t block_idx = ff->case_cnt / CASES_PER_BLOCK;
314 size_t case_idx = ff->case_cnt % CASES_PER_BLOCK;
315 struct ccase new_case;
317 case_bytes += ff->case_acct_size;
318 case_clone (&new_case, c);
321 if ((block_idx & (block_idx - 1)) == 0)
323 size_t block_cap = block_idx == 0 ? 1 : block_idx * 2;
324 ff->cases = xnrealloc (ff->cases,
325 block_cap, sizeof *ff->cases);
328 ff->cases[block_idx] = xnmalloc (CASES_PER_BLOCK,
332 case_move (&ff->cases[block_idx][case_idx], &new_case);
336 casefile_to_disk (cf);
337 assert (ff->storage == DISK);
338 write_case_to_disk (ff, c);
342 write_case_to_disk (ff, c);
349 /* Returns the number of cases in fastfile CF. */
351 fastfile_get_case_cnt (const struct casefile *cf)
353 const struct fastfile *ff = (const struct fastfile *) cf;
358 /* Returns true only if fastfile CF is stored in memory (instead of on
359 disk), false otherwise. */
361 fastfile_in_core (const struct casefile *cf)
363 const struct fastfile *ff = (const struct fastfile *) cf;
364 return (ff->storage == MEMORY);
368 /* If CF is currently stored in memory, writes it to disk. Readers, if any,
369 retain their current positions.
370 Returns true if successful, false if an I/O error occurred. */
372 fastfile_to_disk (const struct casefile *cf_)
374 struct fastfile *ff = (struct fastfile *) cf_;
375 struct casefile *cf = &ff->cf;
377 if (ff->storage == MEMORY)
379 size_t idx, block_cnt;
380 struct casereader *reader;
382 assert (ff->file_name == NULL);
383 assert (ff->fd == -1);
384 assert (ff->buffer_used == 0);
386 if (!make_temp_file (&ff->fd, &ff->file_name))
393 ff->buffer = xnmalloc (ff->buffer_size, sizeof *ff->buffer);
394 memset (ff->buffer, 0, ff->buffer_size * sizeof *ff->buffer);
396 case_bytes -= ff->case_cnt * ff->case_acct_size;
397 for (idx = 0; idx < ff->case_cnt; idx++)
399 size_t block_idx = idx / CASES_PER_BLOCK;
400 size_t case_idx = idx % CASES_PER_BLOCK;
401 struct ccase *c = &ff->cases[block_idx][case_idx];
402 write_case_to_disk (ff, c);
406 block_cnt = DIV_RND_UP (ff->case_cnt, CASES_PER_BLOCK);
407 for (idx = 0; idx < block_cnt; idx++)
408 free (ff->cases[idx]);
413 if (ff->mode == READ)
416 ll_for_each (reader, struct casereader, ll, &cf->reader_list)
417 reader_open_file ((struct fastfilereader *) reader);
423 /* Puts a fastfile to "sleep", that is, minimizes the resources
424 needed for it by closing its file descriptor and freeing its
425 buffer. This is useful if we need so many fastfiles that we
426 might not have enough memory and file descriptors to go
429 For simplicity, this implementation always converts the
430 fastfile to reader mode. If this turns out to be a problem,
431 with a little extra work we could also support sleeping
434 Returns true if successful, false if an I/O error occurred. */
436 fastfile_sleep (const struct casefile *cf_)
438 struct fastfile *ff = (struct fastfile *) cf_;
439 struct casefile *cf = &ff->cf;
441 fastfile_to_disk (cf);
449 if (ff->buffer != NULL)
459 /* Returns true if an I/O error has occurred in fastfile CF. */
461 fastfile_error (const struct casefile *cf)
463 const struct fastfile *ff = (const struct fastfile *) cf;
467 /* Destroys fastfile CF. */
469 fastfile_destroy (struct casefile *cf)
471 struct fastfile *ff = (struct fastfile *) cf;
475 if (ff->cases != NULL)
477 size_t idx, block_cnt;
479 case_bytes -= ff->case_cnt * ff->case_acct_size;
480 for (idx = 0; idx < ff->case_cnt; idx++)
482 size_t block_idx = idx / CASES_PER_BLOCK;
483 size_t case_idx = idx % CASES_PER_BLOCK;
484 struct ccase *c = &ff->cases[block_idx][case_idx];
488 block_cnt = DIV_RND_UP (ff->case_cnt, CASES_PER_BLOCK);
489 for (idx = 0; idx < block_cnt; idx++)
490 free (ff->cases[idx]);
498 if (ff->file_name != NULL && remove (ff->file_name) == -1)
499 io_error (ff, _("%s: Removing temporary file: %s."),
500 ff->file_name, strerror (errno));
501 free (ff->file_name);
510 /* Creates and returns a fastfile to store cases of VALUE_CNT
511 `union value's each. */
513 fastfile_create (size_t value_cnt)
515 struct fastfile *ff = xzalloc (sizeof *ff);
516 struct casefile *cf = &ff->cf;
518 casefile_register (cf, &class);
520 ff->value_cnt = value_cnt;
521 ff->case_acct_size = (ff->value_cnt + 4) * sizeof *ff->buffer;
523 ff->storage = MEMORY;
525 cf->being_destroyed = false;
529 ff->file_name = NULL;
531 ff->buffer_size = ROUND_UP (ff->value_cnt, IO_BUF_SIZE);
532 if (ff->value_cnt > 0 && ff->buffer_size % ff->value_cnt > 64)
533 ff->buffer_size = ff->value_cnt;
541 /* Marks FF as having encountered an I/O error.
542 If this is the first error on CF, reports FORMAT to the user,
543 doing printf()-style substitutions. */
545 io_error (struct fastfile *ff, const char *format, ...)
552 m.category = MSG_GENERAL;
553 m.severity = MSG_ERROR;
554 m.where.file_name = NULL;
555 m.where.line_number = -1;
556 va_start (args, format);
557 m.text = xvasprintf (format, args);
565 /* Calls open(), passing FILE_NAME and FLAGS, repeating as necessary
566 to deal with interrupted calls. */
568 safe_open (const char *file_name, int flags)
574 fd = open (file_name, flags);
576 while (fd == -1 && errno == EINTR);
581 /* Calls close(), passing FD, repeating as necessary to deal with
582 interrupted calls. */
592 while (retval == -1 && errno == EINTR);
598 /* Writes case C to fastfile CF's disk buffer, first flushing the buffer to
599 disk if it would otherwise overflow.
600 Returns true if successful, false if an I/O error occurred. */
602 write_case_to_disk (struct fastfile *ff, const struct ccase *c)
607 case_to_values (c, ff->buffer + ff->buffer_used, ff->value_cnt);
608 ff->buffer_used += ff->value_cnt;
609 if (ff->buffer_used + ff->value_cnt > ff->buffer_size)
614 /* If any bytes in FF's output buffer are used, flush them to
617 flush_buffer (struct fastfile *ff)
619 if (ff->ok && ff->buffer_used > 0)
621 if (!full_write (ff->fd, ff->buffer,
622 ff->buffer_size * sizeof *ff->buffer))
623 io_error (ff, _("Error writing temporary file: %s."),
630 /* Opens a disk file for READER and seeks to the current position as indicated
631 by case_idx. Normally the current position is the beginning of the file,
632 but fastfile_to_disk may cause the file to be opened at a different
635 reader_open_file (struct fastfilereader *reader)
637 struct casefile *cf = casereader_get_casefile(&reader->cr);
638 struct fastfile *ff = (struct fastfile *) cf;
639 if (!ff->ok || reader->case_idx >= ff->case_cnt)
649 reader->fd = safe_open (ff->file_name, O_RDONLY);
651 io_error (ff, _("%s: Opening temporary file: %s."),
652 ff->file_name, strerror (errno));
655 if (ff->buffer != NULL)
657 reader->buffer = ff->buffer;
662 reader->buffer = xnmalloc (ff->buffer_size, sizeof *ff->buffer);
663 memset (reader->buffer, 0, ff->buffer_size * sizeof *ff->buffer);
666 case_create (&reader->c, ff->value_cnt);
668 reader->buffer_ofs = -1;
669 reader->file_ofs = -1;
670 seek_and_fill_buffer (reader);
673 /* Seeks the backing file for READER to the proper position and
674 refreshes the buffer contents. */
676 seek_and_fill_buffer (struct fastfilereader *reader)
678 struct casefile *cf = casereader_get_casefile(&reader->cr);
679 struct fastfile *ff = (struct fastfile *) cf;
682 if (ff->value_cnt != 0)
684 size_t buffer_case_cnt = ff->buffer_size / ff->value_cnt;
685 new_ofs = ((off_t) reader->case_idx / buffer_case_cnt
686 * ff->buffer_size * sizeof *ff->buffer);
687 reader->buffer_pos = (reader->case_idx % buffer_case_cnt
692 if (new_ofs != reader->file_ofs)
694 if (lseek (reader->fd, new_ofs, SEEK_SET) != new_ofs)
695 io_error (ff, _("%s: Seeking temporary file: %s."),
696 ff->file_name, strerror (errno));
698 reader->file_ofs = new_ofs;
701 if (ff->case_cnt > 0 && ff->value_cnt > 0 && reader->buffer_ofs != new_ofs)
702 fill_buffer (reader);
705 /* Fills READER's buffer by reading a block from disk. */
707 fill_buffer (struct fastfilereader *reader)
709 struct casefile *cf = casereader_get_casefile(&reader->cr);
710 struct fastfile *ff = (struct fastfile *) cf;
713 int bytes = full_read (reader->fd, reader->buffer,
715 sizeof *reader->buffer);
717 io_error (ff, _("%s: Reading temporary file: %s."),
718 ff->file_name, strerror (errno));
719 else if (bytes != ff->buffer_size * sizeof *reader->buffer)
720 io_error (ff, _("%s: Temporary file ended unexpectedly."),
724 reader->buffer_ofs = reader->file_ofs;
725 reader->file_ofs += bytes;
731 static const struct class_casefile class =
735 fastfile_get_value_cnt,
736 fastfile_get_case_cnt,
746 static const struct class_casereader class_reader =
748 fastfilereader_get_next_case,
750 fastfilereader_destroy,