From: Ben Pfaff Date: Sun, 4 Feb 2018 21:09:07 +0000 (-0800) Subject: fbuf: New data structure for buffered file I/O. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=7344ce41487d4294726d597b99bbef5c3cfa1ce6 fbuf: New data structure for buffered file I/O. This is a substitute for stdio that allows the client to portably work with memory buffers in addition to files. John Darrington suggested that this could be useful in the GUI. --- diff --git a/src/data/any-reader.c b/src/data/any-reader.c index ff7f4ab6c9..0b52a6956f 100644 --- a/src/data/any-reader.c +++ b/src/data/any-reader.c @@ -66,7 +66,7 @@ any_reader_detect (const struct file_handle *file_handle, if (classp) *classp = NULL; - file = fn_open (file_handle, "rb"); + file = fn_fopen (file_handle, "rb"); if (file == NULL) { msg (ME, _("An error occurred while opening `%s': %s."), diff --git a/src/data/csv-file-writer.c b/src/data/csv-file-writer.c index c01cb34231..94338c2bcf 100644 --- a/src/data/csv-file-writer.c +++ b/src/data/csv-file-writer.c @@ -153,7 +153,7 @@ csv_writer_open (struct file_handle *fh, const struct dictionary *dict, goto error; /* Create the file on disk. */ - w->rf = replace_file_start (fh, "w", 0666, &w->file); + w->rf = replace_file_start (fh, false, 0666, &w->file); if (w->rf == NULL) { msg (ME, _("Error opening `%s' for writing as a system file: %s."), diff --git a/src/data/encrypted-file.c b/src/data/encrypted-file.c index f1074b4c1d..17b95343c8 100644 --- a/src/data/encrypted-file.c +++ b/src/data/encrypted-file.c @@ -70,7 +70,7 @@ encrypted_file_open (struct encrypted_file **fp, const struct file_handle *fh) f = xmalloc (sizeof *f); f->error = 0; - f->file = fn_open (fh, "rb"); + f->file = fn_fopen (fh, "rb"); if (f->file == NULL) { msg (ME, _("An error occurred while opening `%s': %s."), diff --git a/src/data/file-name.c b/src/data/file-name.c index afc99a8825..edbabd5ed8 100644 --- a/src/data/file-name.c +++ b/src/data/file-name.c @@ -142,7 +142,7 @@ safety_violation (const char *fn) NULL on failure. If NULL is returned then errno is set to a sensible value. */ FILE * -fn_open (const struct file_handle *fh, const char *mode) +fn_fopen (const struct file_handle *fh, const char *mode) { const char *fn = fh_get_file_name (fh); @@ -204,7 +204,46 @@ fn_open (const struct file_handle *fh, const char *mode) #endif } -/* Counterpart to fn_open that closes file F with name FN; returns 0 +/* File open routine that understands `-' as stdin/stdout. Returns file + descriptor on success, otherwise a negative errno value. */ +int +fn_open (const struct file_handle *fh, int flags, mode_t mode) +{ + const char *fn = fh_get_file_name (fh); + + int orig_fd = -1; + if ((flags & O_ACCMODE) == O_RDONLY) + { + if (!strcmp (fn, "stdin") || !strcmp (fn, "-")) + orig_fd = STDIN_FILENO; + } + else + { + if (!strcmp (fn, "stdout") || !strcmp (fn, "-")) + orig_fd = STDOUT_FILENO; + else if (!strcmp (fn, "stderr")) + orig_fd = STDERR_FILENO; + } + if (orig_fd >= 0) + { + int fd = dup (orig_fd); + return fd >= 0 ? fd : -errno; + } + +#if WIN32 + wchar_t *ss = convert_to_filename_encoding (fn, strlen (fn), fh_get_file_name_encoding (fh)); + wchar_t *m = (wchar_t *) recode_string ("UTF-16LE", "ASCII", mode, strlen (mode)); + int fd = _wopen (fn, flags, mode); + free (m); + free (ss); +#else + int fd = open (fn, flags, mode); +#endif + + return fd >= 0 ? fd : -errno; +} + +/* Counterpart to fn_fopen that closes file F with name FN; returns 0 on success, EOF on failure. If EOF is returned, errno is set to a sensible value. */ int diff --git a/src/data/file-name.h b/src/data/file-name.h index b4eee83e76..23c2e9da97 100644 --- a/src/data/file-name.h +++ b/src/data/file-name.h @@ -29,8 +29,10 @@ char *fn_extension (const struct file_handle *); bool fn_exists (const struct file_handle *); -FILE *fn_open (const struct file_handle *fn, const char *mode); -int fn_close (const struct file_handle *fn, FILE *file); +FILE *fn_fopen (const struct file_handle *, const char *mode); +int fn_close (const struct file_handle *, FILE *file); + +int fn_open (const struct file_handle *, int flags, mode_t mode); const char * default_output_path (void); diff --git a/src/data/make-file.c b/src/data/make-file.c index 78875746f2..a5b68051ac 100644 --- a/src/data/make-file.c +++ b/src/data/make-file.c @@ -193,13 +193,12 @@ static void free_replace_file (struct replace_file *); static void unlink_replace_files (void); struct replace_file * -replace_file_start (const struct file_handle *fh, const char *mode, - mode_t permissions, FILE **fp) +replace_file_start_fd (const struct file_handle *fh, + bool binary, mode_t permissions, int *fd) { static bool registered; struct TS_stat s; struct replace_file *rf; - int fd; int saved_errno = errno; const char *file_name = fh_get_file_name (fh); @@ -211,8 +210,8 @@ replace_file_start (const struct file_handle *fh, const char *mode, if (Tstat (Tfile_name, &s) == 0 && !S_ISREG (s.st_mode)) { /* Open file descriptor. */ - fd = Topen (Tfile_name, O_WRONLY); - if (fd < 0) + *fd = Topen (Tfile_name, O_WRONLY); + if (*fd < 0) { saved_errno = errno; msg (ME, _("Opening %s for writing: %s."), @@ -221,18 +220,6 @@ replace_file_start (const struct file_handle *fh, const char *mode, return NULL; } - /* Open file as stream. */ - *fp = fdopen (fd, mode); - if (*fp == NULL) - { - saved_errno = errno; - msg (ME, _("Opening stream for %s: %s."), - file_name, strerror (saved_errno)); - close (fd); - free (Tfile_name); - return NULL; - } - rf = xzalloc (sizeof *rf); rf->file_name = NULL; rf->tmp_name = Tfile_name; @@ -266,8 +253,9 @@ replace_file_start (const struct file_handle *fh, const char *mode, rf->tmp_name = convert_to_filename_encoding (rf->tmp_name_verbatim, strlen (rf->tmp_name_verbatim), fh_get_file_name_encoding (fh)); /* Create file by that name. */ - fd = Topen (rf->tmp_name, O_WRONLY | O_CREAT | O_EXCL | O_BINARY, permissions); - if (fd >= 0) + int flags = O_WRONLY | O_CREAT | O_EXCL | (binary ? O_BINARY : O_TEXT); + *fd = Topen (rf->tmp_name, flags, permissions); + if (*fd >= 0) break; if (errno != EEXIST) { @@ -279,29 +267,50 @@ replace_file_start (const struct file_handle *fh, const char *mode, } + /* Register file for deletion. */ + ll_push_head (&all_files, &rf->ll); + unblock_fatal_signals (); + + return rf; + + error: + unblock_fatal_signals (); + free_replace_file (rf); + *fd = -1; + errno = saved_errno; + return NULL; +} + +struct replace_file * +replace_file_start (const struct file_handle *fh, bool binary, + mode_t permissions, FILE **fp) +{ + struct replace_file *rf; + int fd; + + /* Open fd. */ + rf = replace_file_start_fd (fh, binary, permissions, &fd); + if (!rf) + goto error; + /* Open file as stream. */ - *fp = fdopen (fd, mode); + *fp = fdopen (fd, binary ? "wb" : "w"); if (*fp == NULL) { - saved_errno = errno; + int error = errno; msg (ME, _("Opening stream for temporary file %s: %s."), - rf->tmp_name_verbatim, strerror (saved_errno)); + rf->tmp_name_verbatim, strerror (error)); close (fd); - Tunlink (rf->tmp_name); + replace_file_abort (rf); + errno = error; + goto error; } - /* Register file for deletion. */ - ll_push_head (&all_files, &rf->ll); - unblock_fatal_signals (); - return rf; - error: - unblock_fatal_signals (); - free_replace_file (rf); +error: *fp = NULL; - errno = saved_errno; return NULL; } diff --git a/src/data/make-file.h b/src/data/make-file.h index a2bcc76eb1..d908db3366 100644 --- a/src/data/make-file.h +++ b/src/data/make-file.h @@ -23,25 +23,29 @@ struct file_handle; -/* Prepares to atomically replace a (potentially) existing file - by a new file., by creating a temporary file with the given - PERMISSIONS bits. +/* Prepares to atomically replace a (potentially) existing file by a new file, + by creating a temporary file with the given PERMISSIONS bits. Special files are an exception: they are not atomically replaced but simply opened for writing. - If successful, stores a stream for it opened according to MODE (which should be - "w" or "wb") in *FP. Returns a ticket that can be used to - commit or abort the file replacement. If neither action has - yet been taken, program termination via signal will cause - all resources to be released. The return value must not be - explicitly freed. + If successful, returns a ticket that can be used to commit or abort the file + replacement. If neither action is taken, program termination via signal + will abort. Depending on the function, stores a file descriptor in *FD or a + stream in *FP for the newly opened file. The descriptor or stream is opened + for writing a binary file if BINARY is true, otherwise a text file (this + disctinction only matters on Windows). - The caller is responsible for closing *FP */ + On error, returns NULL and stores NULL in *FP or -1 in *FD. + + The caller is responsible for closing *FP or *FD. */ struct replace_file *replace_file_start (const struct file_handle *fh, - const char *mode, mode_t permissions, + bool binary, mode_t permissions, FILE **fp); +struct replace_file *replace_file_start_fd (const struct file_handle *fh, + bool binary, + mode_t permissions, int *fd); /* Commits or aborts the replacement of a (potentially) existing file by a new file, using the ticket returned by diff --git a/src/data/pc+-file-reader.c b/src/data/pc+-file-reader.c index cc80cd723b..33b2bfe0e9 100644 --- a/src/data/pc+-file-reader.c +++ b/src/data/pc+-file-reader.c @@ -209,7 +209,7 @@ pcp_open (struct file_handle *fh) goto error; /* Open file. */ - r->file = fn_open (fh, "rb"); + r->file = fn_fopen (fh, "rb"); if (r->file == NULL) { msg (ME, _("Error opening `%s' for reading as an SPSS/PC+ " diff --git a/src/data/por-file-reader.c b/src/data/por-file-reader.c index 15a3b7902e..7f6f7e6b73 100644 --- a/src/data/por-file-reader.c +++ b/src/data/por-file-reader.c @@ -288,7 +288,7 @@ pfm_open (struct file_handle *fh) goto error; /* Open file. */ - r->file = fn_open (r->fh, "rb"); + r->file = fn_fopen (r->fh, "rb"); if (r->file == NULL) { msg (ME, _("An error occurred while opening `%s' for reading " diff --git a/src/data/por-file-writer.c b/src/data/por-file-writer.c index 4b25d38f31..1853ad3a56 100644 --- a/src/data/por-file-writer.c +++ b/src/data/por-file-writer.c @@ -152,8 +152,7 @@ pfm_open_writer (struct file_handle *fh, struct dictionary *dict, mode = 0444; if (opts.create_writeable) mode |= 0222; - w->rf = replace_file_start (fh, "w", mode, - &w->file); + w->rf = replace_file_start (fh, false, mode, &w->file); if (w->rf == NULL) { msg (ME, _("Error opening `%s' for writing as a portable file: %s."), diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index b2db755732..c0a7d4fc99 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -19,6 +19,7 @@ #include "data/sys-file-private.h" #include +#include #include #include #include @@ -44,6 +45,7 @@ #include "libpspp/array.h" #include "libpspp/assertion.h" #include "libpspp/compiler.h" +#include "libpspp/fbuf.h" #include "libpspp/i18n.h" #include "libpspp/ll.h" #include "libpspp/message.h" @@ -191,7 +193,7 @@ struct sfm_reader /* File state. */ struct file_handle *fh; /* File handle. */ struct fh_lock *lock; /* Mutual exclusion for file handle. */ - FILE *file; /* File stream. */ + struct fbuf *fbuf; /* File stream. */ off_t pos; /* Position in file. */ bool error; /* I/O or corruption error? */ struct caseproto *proto; /* Format of output cases. */ @@ -412,13 +414,14 @@ sfm_open (struct file_handle *fh) if (r->lock == NULL) goto error; - r->file = fn_open (fh, "rb"); - if (r->file == NULL) + int fd = fn_open (fh, O_RDONLY | O_BINARY, 0); + if (fd < 0) { msg (ME, _("Error opening `%s' for reading as a system file: %s."), fh_get_file_name (r->fh), strerror (errno)); goto error; } + r->fbuf = fbuf_open_fd (fd); if (!read_dictionary (r)) goto error; @@ -906,15 +909,16 @@ sfm_close (struct any_reader *r_) struct sfm_reader *r = sfm_reader_cast (r_); bool error; - if (r->file) + if (r->fbuf) { - if (fn_close (r->fh, r->file) == EOF) + int error = fbuf_close (r->fbuf); + if (error) { msg (ME, _("Error closing system file `%s': %s."), - fh_get_file_name (r->fh), strerror (errno)); + fh_get_file_name (r->fh), strerror (error)); r->error = true; } - r->file = NULL; + r->fbuf = NULL; } any_read_info_destroy (&r->info); @@ -3245,11 +3249,13 @@ static inline int read_bytes_internal (struct sfm_reader *r, bool eof_is_ok, void *buf, size_t byte_cnt) { - size_t bytes_read = fread (buf, 1, byte_cnt, r->file); + size_t bytes_read = fbuf_read (r->fbuf, buf, byte_cnt); r->pos += bytes_read; if (bytes_read == byte_cnt) return 1; - else if (ferror (r->file)) + + int status = fbuf_get_status (r->fbuf); + if (status > 0) { sys_error (r, r->pos, _("System error: %s."), strerror (errno)); return -1; @@ -3480,9 +3486,10 @@ read_zheader (struct sfm_reader *r) static void seek (struct sfm_reader *r, off_t offset) { - if (fseeko (r->file, offset, SEEK_SET)) + int error = fbuf_seek (r->fbuf, offset); + if (error) sys_error (r, 0, _("%s: seek failed (%s)."), - fh_get_file_name (r->fh), strerror (errno)); + fh_get_file_name (r->fh), strerror (error)); r->pos = offset; } @@ -3500,26 +3507,26 @@ read_ztrailer (struct sfm_reader *r, unsigned int block_size; unsigned int n_blocks; unsigned int i; - struct stat s; - if (fstat (fileno (r->file), &s)) + int seekable = fbuf_is_seekable (r->fbuf); + if (seekable < 0) { sys_error (r, 0, _("%s: stat failed (%s)."), - fh_get_file_name (r->fh), strerror (errno)); + fh_get_file_name (r->fh), strerror (-seekable)); return false; } - - if (!S_ISREG (s.st_mode)) + else if (!seekable) { /* We can't seek to the trailer and then back to the data in this file, so skip doing extra checks. */ return true; } - if (r->ztrailer_ofs + ztrailer_len != s.st_size) + off_t size = fbuf_get_size (r->fbuf); + if (size >= 0 && r->ztrailer_ofs + ztrailer_len != size) sys_warn (r, r->pos, _("End of ZLIB trailer (0x%llx) is not file size (0x%llx)."), - r->ztrailer_ofs + ztrailer_len, (long long int) s.st_size); + r->ztrailer_ofs + ztrailer_len, (long long int) size); seek (r, r->ztrailer_ofs); diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index df5108e2a0..af0ead79b3 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -41,6 +41,7 @@ #include "data/short-names.h" #include "data/value-labels.h" #include "data/variable.h" +#include "libpspp/fbuf.h" #include "libpspp/float-format.h" #include "libpspp/i18n.h" #include "libpspp/integer-format.h" @@ -69,7 +70,7 @@ struct sfm_writer { struct file_handle *fh; /* File handle. */ struct fh_lock *lock; /* Mutual exclusion for file. */ - FILE *file; /* File stream. */ + struct fbuf *fbuf; /* File stream. */ struct replace_file *rf; /* Ticket for replacing output file. */ enum any_compression compression; @@ -216,7 +217,7 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, w = xzalloc (sizeof *w); w->fh = fh_ref (fh); w->lock = NULL; - w->file = NULL; + w->fbuf = NULL; w->rf = NULL; /* Use the requested compression, except that no EBCDIC-based ZLIB compressed @@ -250,13 +251,16 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, mode = 0444; if (opts.create_writeable) mode |= 0222; - w->rf = replace_file_start (fh, "wb", mode, &w->file); + + int fd; + w->rf = replace_file_start_fd (fh, true, mode, &fd); if (w->rf == NULL) { msg (ME, _("Error opening `%s' for writing as a system file: %s."), fh_get_file_name (fh), strerror (errno)); goto error; } + w->fbuf = fbuf_open_fd (fd); get_encoding_info (&encoding_info, dict_get_encoding (d)); w->space = encoding_info.space[0]; @@ -309,7 +313,7 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, w->zstream.zalloc = Z_NULL; w->zstream.zfree = Z_NULL; w->zstream.opaque = Z_NULL; - w->zstart = ftello (w->file); + w->zstart = fbuf_tell (w->fbuf); write_int64 (w, w->zstart); write_int64 (w, 0); @@ -985,7 +989,7 @@ write_long_string_value_labels (struct sfm_writer *w, write_int (w, 1); /* Data item (byte) size. */ write_int (w, size); /* Number of data items. */ - start = ftello (w->file); + start = fbuf_tell (w->fbuf); for (i = 0; i < n_vars; i++) { struct variable *var = dict_get_var (dict, i); @@ -1022,7 +1026,7 @@ write_long_string_value_labels (struct sfm_writer *w, free (label); } } - assert (ftello (w->file) == start + size); + assert (fbuf_tell (w->fbuf) == start + size); } static void @@ -1058,7 +1062,7 @@ write_long_string_missing_values (struct sfm_writer *w, write_int (w, 1); /* Data item (byte) size. */ write_int (w, size); /* Number of data items. */ - start = ftello (w->file); + start = fbuf_tell (w->fbuf); for (i = 0; i < n_vars; i++) { struct variable *var = dict_get_var (dict, i); @@ -1087,7 +1091,7 @@ write_long_string_missing_values (struct sfm_writer *w, write_bytes (w, value_str (value, width), 8); } } - assert (ftello (w->file) == start + size); + assert (fbuf_tell (w->fbuf) == start + size); } static void @@ -1205,7 +1209,7 @@ sys_file_casewriter_write (struct casewriter *writer, void *w_, { struct sfm_writer *w = w_; - if (ferror (w->file)) + if (fbuf_get_status (w->fbuf) > 0) { casewriter_force_error (writer); case_unref (c); @@ -1235,7 +1239,7 @@ sys_file_casewriter_destroy (struct casewriter *writer, void *w_) static bool write_error (const struct sfm_writer *writer) { - return ferror (writer->file); + return fbuf_get_status (writer->fbuf) > 0; } /* Closes a system file after we're done with it. @@ -1249,7 +1253,7 @@ close_writer (struct sfm_writer *w) return true; ok = true; - if (w->file != NULL) + if (w->fbuf != NULL) { /* Flush buffer. */ flush_compressed (w); @@ -1258,20 +1262,20 @@ close_writer (struct sfm_writer *w) finish_zstream (w); write_ztrailer (w); } - fflush (w->file); + fbuf_flush (w->fbuf); ok = !write_error (w); /* Seek back to the beginning and update the number of cases. This is just a courtesy to later readers, so there's no need to check return values or report errors. */ - if (ok && w->case_cnt <= INT32_MAX && !fseeko (w->file, 80, SEEK_SET)) + if (ok && w->case_cnt <= INT32_MAX && !fbuf_seek (w->fbuf, 80)) { write_int (w, w->case_cnt); - clearerr (w->file); + fbuf_clear_status (w->fbuf); } - if (fclose (w->file) == EOF) + if (fbuf_close (w->fbuf) != 0) ok = false; if (!ok) @@ -1489,7 +1493,7 @@ write_ztrailer (struct sfm_writer *w) compressed_ofs += block->compressed_size; } - if (!fseeko (w->file, w->zstart + 8, SEEK_SET)) + if (!fbuf_seek (w->fbuf, w->zstart + 8)) { write_int64 (w, compressed_ofs); write_int64 (w, 24 + (w->n_blocks * 24)); @@ -1613,7 +1617,7 @@ write_string (struct sfm_writer *w, const char *string, size_t width) size_t pad_bytes = width - data_bytes; write_bytes (w, string, data_bytes); while (pad_bytes-- > 0) - putc (w->space, w->file); + fbuf_putc (w->fbuf, w->space); } /* Recodes null-terminated UTF-8 encoded STRING into ENCODING, and writes the @@ -1658,7 +1662,7 @@ write_string_record (struct sfm_writer *w, static void write_bytes (struct sfm_writer *w, const void *data, size_t size) { - fwrite (data, 1, size, w->file); + fbuf_write (w->fbuf, data, size); } /* Writes N zeros to W's output file. */ @@ -1666,7 +1670,7 @@ static void write_zeros (struct sfm_writer *w, size_t n) { while (n-- > 0) - putc (0, w->file); + fbuf_putc (w->fbuf, 0); } /* Writes N spaces to W's output file. */ @@ -1674,5 +1678,5 @@ static void write_spaces (struct sfm_writer *w, size_t n) { while (n-- > 0) - putc (w->space, w->file); + fbuf_putc (w->fbuf, w->space); } diff --git a/src/language/data-io/data-reader.c b/src/language/data-io/data-reader.c index 1e06d28773..c31074e79d 100644 --- a/src/language/data-io/data-reader.c +++ b/src/language/data-io/data-reader.c @@ -154,7 +154,7 @@ dfm_open_reader (struct file_handle *fh, struct lexer *lexer, if (fh_get_referent (fh) != FH_REF_INLINE) { r->line_number = 0; - r->file = fn_open (fh, "rb"); + r->file = fn_fopen (fh, "rb"); if (r->file == NULL) { msg (ME, _("Could not open `%s' for reading as a data file: %s."), diff --git a/src/language/data-io/data-writer.c b/src/language/data-io/data-writer.c index bf9505e7dd..2e03218f69 100644 --- a/src/language/data-io/data-writer.c +++ b/src/language/data-io/data-writer.c @@ -92,7 +92,7 @@ dfm_open_writer (struct file_handle *fh, const char *encoding) w = xmalloc (sizeof *w); w->fh = fh_ref (fh); w->lock = lock; - w->rf = replace_file_start (w->fh, "wb", 0666, &w->file); + w->rf = replace_file_start (w->fh, true, 0666, &w->file); w->encoding = xstrdup (encoding); w->line_ends = fh_get_line_ends (fh); w->unit = ei.unit; diff --git a/src/libpspp/automake.mk b/src/libpspp/automake.mk index b68fce0d52..81f8f1fcf3 100644 --- a/src/libpspp/automake.mk +++ b/src/libpspp/automake.mk @@ -42,6 +42,8 @@ src_libpspp_liblibpspp_la_SOURCES = \ src/libpspp/encoding-guesser.h \ src/libpspp/ext-array.c \ src/libpspp/ext-array.h \ + src/libpspp/fbuf.c \ + src/libpspp/fbuf.h \ src/libpspp/float-format.c \ src/libpspp/float-format.h \ src/libpspp/freaderror.c \ diff --git a/src/libpspp/fbuf.c b/src/libpspp/fbuf.c new file mode 100644 index 0000000000..a3758cebd2 --- /dev/null +++ b/src/libpspp/fbuf.c @@ -0,0 +1,553 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2017 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "fbuf.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libpspp/assertion.h" +#include "libpspp/cast.h" + +#include "gl/intprops.h" +#include "gl/minmax.h" +#include "gl/xalloc.h" +#include "gl/xsize.h" + +#define FBUF_SIZE 4096 + +struct fbuf_class + { + int (*close) (struct fbuf *); + + /* Reads up to N bytes from FBUF's underlying file descriptor into BUFFER. + Returns the number of bytes read, if successful, zero at end of file, or + a negative errno value on error. */ + int (*read) (struct fbuf *fbuf, void *buffer, size_t n); + + /* Writes the N bytes in BUFFER to FBUF's underlying file descriptor. The + * caller guarantees N > 0. Returns the number of bytes written, if + * successful, otherwise a negative errno value. */ + int (*write) (struct fbuf *fbuf, const void *buffer, size_t n); + + /* Seeks to byte offset OFFSET in FBUF's underlying file descriptor. + Returns 0 if successful, otherwise a positive errno value. Returns + -ESPIPE if FBUF does not support positioning. */ + int (*seek) (struct fbuf *fbuf, off_t offset); + + /* Returns the current byte offset in FBUF's underlying file descriptor, or + a negative errno value on error. Returns -ESPIPE + if FBUF does not support positioning. */ + off_t (*tell) (struct fbuf *fbuf); + + /* Returns the size of the file underlying FBUF, in bytes, or a negative + errno value on error. Returns -ESPIPE if FBUF does not support + positioning. */ + off_t (*get_size) (struct fbuf *fbuf); + }; + +struct fbuf_fd + { + struct fbuf up; + int fd; + }; + +static void +fbuf_init (struct fbuf *fbuf, const struct fbuf_class *class, off_t offset) +{ + memset (fbuf, 0, sizeof *fbuf); + fbuf->class = class; + fbuf->buffer = xmalloc (FBUF_SIZE); + fbuf->offset = offset >= 0 ? offset : TYPE_MINIMUM (off_t); +} + +/* Closes FBUF. Returns 0 if successful, otherwise a positive errno value that + represents an error reading or writing the underlying fd (which could have + happened earlier or as part of the final flush implied by closing). */ +int +fbuf_close (struct fbuf *fbuf) +{ + if (!fbuf) + return 0; + + fbuf_flush (fbuf); + int status = fbuf->status; + int error = fbuf->class->close (fbuf); + return status ? status : error; +} + +/* Returns FBUF's error status, which is 0 if no error has been recorded and + otherwise a positive errno value. The error, if any, reflects difficulty + reading or writing the underlying fd. */ +int +fbuf_get_status (const struct fbuf *fbuf) +{ + return fbuf->status; +} + +/* Clears any previously recorded error status. */ +void +fbuf_clear_status (struct fbuf *fbuf) +{ + fbuf->status = 0; +} + +/* Returns the length of the file backing FBUF, in bytes, or a negative errno + value on error. A return value of -ESPIPE indicates that the underlying + file is not seekable, i.e. does not have a length. */ +off_t +fbuf_get_size (const struct fbuf *fbuf_) +{ + struct fbuf *fbuf = CONST_CAST (struct fbuf *, fbuf_); + return fbuf->class->get_size (fbuf); +} + +/* Returns true if FBUF is seekable, false otherwise. */ +int +fbuf_is_seekable (const struct fbuf *fbuf) +{ + return fbuf_tell (fbuf) != -ESPIPE; +} + +/* Attempts to flush any data buffered for writing to the underlying file. + Returns 0 if successful (which includes the case where FBUF is not in write + mode) or a positive errno value if there is a write error. */ +int +fbuf_flush (struct fbuf *fbuf) +{ + for (;;) + { + assert (fbuf->write_tail <= fbuf->write_head); + int n = fbuf->write_head - fbuf->write_tail; + if (n <= 0) + return 0; + + int retval = fbuf->class->write (fbuf, fbuf->write_tail, n); + if (retval < 0) + { + fbuf->status = -retval; + return fbuf->status; + } + + fbuf->write_tail += n; + if (fbuf->offset >= 0) + fbuf->offset += n; + if (fbuf->write_tail >= fbuf->write_head) + { + fbuf->write_tail = fbuf->write_head = fbuf->buffer; + return 0; + } + } +} + +/* Returns the byte offset in FBUF's file of the read byte to be read or + written, or a negative errno value if the offset cannot be determined. + Returns -ESPIPE if the underlying file is not seekable. */ +off_t +fbuf_tell (const struct fbuf *fbuf_) +{ + struct fbuf *fbuf = CONST_CAST (struct fbuf *, fbuf_); + + if (fbuf->offset < 0) + { + if (fbuf->offset != -ESPIPE) + fbuf->offset = fbuf->class->tell (fbuf); + + if (fbuf->offset < 0) + return fbuf->offset; + } + + return (fbuf->offset + - (fbuf->read_head - fbuf->read_tail) + + (fbuf->write_head - fbuf->write_tail)); +} + +/* Attempts to seek in FBUF such that the next byte to be read or written will + be at byte offset OFFSET. Returns 0 if successful or a negative errno value + otherwise. Returns -ESPIPE if the underlying file is not seekable. */ +int +fbuf_seek (struct fbuf *fbuf, off_t offset) +{ + if (offset < 0) + return EINVAL; + + int error = fbuf_flush (fbuf); + if (error) + return error; + + fbuf->read_tail = fbuf->read_head = NULL; + fbuf->write_tail = fbuf->write_head = fbuf->write_end = NULL; + + error = fbuf->class->seek (fbuf, offset); + if (!error) + fbuf->offset = offset; + return error; +} + +/* Attempts to write the SIZE bytes of data in DATA to FBUF. On success, + returns the number of bytes actually written (possibly less than SIZE), and + on failure returns a negative errno value. Returns 0 only if SIZE is 0. + + If the last I/O operation on FBUF was a read, the caller must call + fbuf_seek() before this function. */ +ssize_t +fbuf_write (struct fbuf *fbuf, const void *data_, size_t size) +{ + const uint8_t *data = data_; + size_t n_written = 0; + while (size > 0) + { + size_t avail = fbuf->write_end - fbuf->write_head; + size_t chunk = MIN (avail, size); + if (chunk) + { + if (chunk < FBUF_SIZE) + { + /* Normal case: copy into buffer. */ + memcpy (fbuf->write_head, data, chunk); + fbuf->write_head += chunk; + } + else + { + /* Buffer is empty and we're writing more data than will fit in + the buffer. Skip the buffer. */ + chunk = MIN (INT_MAX, size); + int retval = fbuf->class->write (fbuf, data, chunk); + if (retval < 0) + return n_written ? n_written : -retval; + if (fbuf->offset >= 0) + fbuf->offset += retval; + } + data += chunk; + size -= chunk; + n_written += chunk; + } + else + { + int error = fbuf_flush (fbuf); + if (error) + return n_written ? n_written : -error; + + /* Use fbuf_seek() to switch between reading and writing. */ + assert (!fbuf->read_head); + + if (!fbuf->write_tail) + { + fbuf->write_tail = fbuf->write_head = fbuf->buffer; + fbuf->write_end = fbuf->buffer + FBUF_SIZE; + } + } + } + return n_written; +} + +int +fbuf_getc__ (struct fbuf *fbuf) +{ + uint8_t c; + int retval = fbuf_read (fbuf, &c, 1); + return retval == 1 ? c : EOF; +} + +/* Attempts to read SIZE bytes of data from FBUF into DATA. On success, + returns the number of bytes actually read (possibly less than SIZE), and on + failure returns a negative errno value. Returns 0 only if end of file was + reached before any data could be read. + + If the last I/O operation on FBUF was a write, the caller must call + fbuf_seek() before this function. */ +ssize_t +fbuf_read (struct fbuf *fbuf, void *data_, size_t size) +{ + uint8_t *data = data_; + size_t n_read = 0; + while (size > 0) + { + size_t avail = fbuf->read_head - fbuf->read_tail; + size_t chunk = MIN (avail, size); + if (chunk) + { + /* Copy out of buffer. */ + memcpy (data, fbuf->read_tail, chunk); + fbuf->read_tail += chunk; + data += chunk; + size -= chunk; + n_read += chunk; + } + else + { + /* Buffer is empty. */ + + /* Use fbuf_seek() to switch between reading and writing. */ + assert (!fbuf->write_head); + + if (size < FBUF_SIZE) + { + /* Normal case: fill the buffer. */ + int retval = fbuf->class->read (fbuf, fbuf->buffer, FBUF_SIZE); + if (retval < 0) + { + fbuf->status = -retval; + return n_read ? n_read : retval; + } + else if (retval == 0) + return n_read; + if (fbuf->offset >= 0) + fbuf->offset += retval; + fbuf->read_tail = fbuf->buffer; + fbuf->read_head = fbuf->buffer + retval; + } + else + { + /* Caller's read buffer is bigger than FBUF_SIZE. Use it + directly. */ + int retval = fbuf->class->read (fbuf, data, size); + if (retval < 0) + { + fbuf->status = -retval; + return n_read ? n_read : retval; + } + else if (retval == 0) + return n_read; + if (fbuf->offset >= 0) + fbuf->offset += retval; + data += retval; + size -= retval; + n_read += retval; + } + } + } + return n_read; +} + +/* Implementation of file-based fbuf. */ + +static const struct fbuf_class fbuf_fd_class; + +/* Returns a new fbuf that represents FD. */ +struct fbuf * +fbuf_open_fd (int fd) +{ + struct fbuf_fd *fbuf = xmalloc (sizeof *fbuf); + fbuf_init (&fbuf->up, &fbuf_fd_class, -1); + fbuf->fd = fd; + return &fbuf->up; +} + +/* Opens FILENAME with FLAGS and MODE and stores a new fbuf that represents it + into *FBUFP. Returns 0 on success, or a positive errno value on failure. + ON failure, *FBUFP will be NULL. */ +int +fbuf_open_file (const char *filename, int flags, mode_t mode, + struct fbuf **fbufp) +{ + int fd = open (filename, flags, mode); + if (fd < 0) + { + *fbufp = NULL; + return errno; + } + *fbufp = fbuf_open_fd (fd); + return 0; +} + +static struct fbuf_fd * +fbuf_fd_cast (const struct fbuf *fbuf) +{ + assert (fbuf->class == &fbuf_fd_class); + return UP_CAST (fbuf, struct fbuf_fd, up); +} + +static int +fbuf_fd_close (struct fbuf *fbuf_) +{ + struct fbuf_fd *fbuf = fbuf_fd_cast (fbuf_); + int retval = close (fbuf->fd) == EOF ? errno : 0; + free (fbuf); + return retval; +} + +static int +fbuf_fd_read (struct fbuf *fbuf_, void *buffer, size_t n) +{ + struct fbuf_fd *fbuf = fbuf_fd_cast (fbuf_); + int retval = read (fbuf->fd, buffer, n); + return retval >= 0 ? retval : -errno; +} + +static int +fbuf_fd_write (struct fbuf *fbuf_, const void *buffer, size_t n) +{ + struct fbuf_fd *fbuf = fbuf_fd_cast (fbuf_); + int retval = write (fbuf->fd, buffer, n); + return retval > 0 ? retval : -errno; +} + +static int +fbuf_fd_seek (struct fbuf *fbuf_, off_t offset) +{ + struct fbuf_fd *fbuf = fbuf_fd_cast (fbuf_); + return lseek (fbuf->fd, offset, SEEK_SET) < 0 ? errno : 0; +} + +static off_t +fbuf_fd_tell (struct fbuf *fbuf_) +{ + struct fbuf_fd *fbuf = fbuf_fd_cast (fbuf_); + off_t offset = lseek (fbuf->fd, 0, SEEK_CUR); + return offset >= 0 ? offset : -errno; +} + +static off_t +fbuf_fd_get_size (struct fbuf *fbuf_) +{ + struct fbuf_fd *fbuf = fbuf_fd_cast (fbuf_); + off_t offset = lseek (fbuf->fd, 0, SEEK_END); + return offset >= 0 ? offset : -errno; +} + +static const struct fbuf_class fbuf_fd_class = + { + fbuf_fd_close, + fbuf_fd_read, + fbuf_fd_write, + fbuf_fd_seek, + fbuf_fd_tell, + fbuf_fd_get_size, + }; + +struct fbuf_memory + { + struct fbuf up; + uint8_t *data; + size_t size, allocated; + }; + +static const struct fbuf_class fbuf_memory_class; + +/* Takes ownership of the N bytes of data at DATA, which must have been + allocated with malloc(), as a memory buffer and makes it the backing for the + newly returned fbuf. Initially, the fbuf is positioned at the beginning of + the data, so that reads will read from it and writes will overwrite it. (To + append, use fbuf_seek() to seek to the end.) + + Writes beyond the end will reallocate the buffer. Closing the returned fbuf + will free the buffer. */ +struct fbuf * +fbuf_open_memory (void *data, size_t n) +{ + struct fbuf_memory *fbuf = xmalloc (sizeof *fbuf); + fbuf_init (&fbuf->up, &fbuf_memory_class, 0); + fbuf->data = data; + fbuf->size = n; + fbuf->allocated = n; + return &fbuf->up; +} + +static struct fbuf_memory * +fbuf_memory_cast (const struct fbuf *fbuf) +{ + assert (fbuf->class == &fbuf_memory_class); + return UP_CAST (fbuf, struct fbuf_memory, up); +} + +static int +fbuf_memory_close (struct fbuf *fbuf_) +{ + struct fbuf_memory *fbuf = fbuf_memory_cast (fbuf_); + free (fbuf->data); + free (fbuf); + return 0; +} + +static int +fbuf_memory_read (struct fbuf *fbuf_, void *buffer, size_t n) +{ + struct fbuf_memory *fbuf = fbuf_memory_cast (fbuf_); + if (fbuf->up.offset >= fbuf->size) + return 0; + + size_t chunk = MIN (n, fbuf->size - fbuf->up.offset); + memcpy (buffer, fbuf->data + fbuf->up.offset, chunk); + return chunk; +} + +static int +fbuf_memory_write (struct fbuf *fbuf_, const void *buffer, size_t n) +{ + struct fbuf_memory *fbuf = fbuf_memory_cast (fbuf_); + + /* Fail if write would cause the memory block to exceed SIZE_MAX bytes. */ + size_t end = xsum (fbuf->up.offset, n); + if (size_overflow_p (end)) + return -EFBIG; + + /* Expand fbuf->data if necessary to hold the write. */ + if (end > fbuf->allocated) + { + fbuf->allocated = end < SIZE_MAX / 2 ? end * 2 : end; + fbuf->data = xrealloc (fbuf->data, fbuf->allocated); + } + + /* Zero-pad to reach the current offset (although this is necessary only if + there has been a seek past the end), then copy in the new data. */ + if (fbuf->up.offset > fbuf->size) + memset (fbuf->data + fbuf->size, 0, fbuf->up.offset - fbuf->size); + memcpy (fbuf->data + fbuf->up.offset, buffer, n); + + if (end > fbuf->size) + fbuf->size = end; + + return n; +} + +static int +fbuf_memory_seek (struct fbuf *fbuf UNUSED, off_t offset UNUSED) +{ + return 0; +} + +static off_t +fbuf_memory_tell (struct fbuf *fbuf UNUSED) +{ + NOT_REACHED (); +} + +static off_t +fbuf_memory_get_size (struct fbuf *fbuf_) +{ + struct fbuf_memory *fbuf = fbuf_memory_cast (fbuf_); + return fbuf->size; +} + +static const struct fbuf_class fbuf_memory_class = + { + fbuf_memory_close, + fbuf_memory_read, + fbuf_memory_write, + fbuf_memory_seek, + fbuf_memory_tell, + fbuf_memory_get_size, + }; diff --git a/src/libpspp/fbuf.h b/src/libpspp/fbuf.h new file mode 100644 index 0000000000..94a56a2945 --- /dev/null +++ b/src/libpspp/fbuf.h @@ -0,0 +1,140 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2017 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef LIBPSPP_FBUF_H +#define LIBPSPP_FBUF_H 1 + +#include +#include +#include +#include "compiler.h" + +/* Data structure for buffered file I/O. + + fbuf is much like stdio and serves the same purpose. The main difference is + that it is defined entirely in PSPP and thus it is possible to extend it in + a portable way, in particular to have a portable way to read and write a + memory buffer instead of a file descriptor, since GNU and BSD libc provide + ways to do that, but other libcs don't. */ + +struct fbuf; + +struct fbuf *fbuf_open_fd (int fd); +int fbuf_open_file (const char *filename, int flags, mode_t mode, + struct fbuf **) WARN_UNUSED_RESULT; +struct fbuf *fbuf_open_memory (void *, size_t); + +int fbuf_close (struct fbuf *); + +int fbuf_get_status (const struct fbuf *); +void fbuf_clear_status (struct fbuf *); + +int fbuf_is_seekable (const struct fbuf *); +off_t fbuf_get_size (const struct fbuf *); + +int fbuf_flush (struct fbuf *); + +off_t fbuf_tell (const struct fbuf *); +int fbuf_seek (struct fbuf *, off_t); + +static inline int fbuf_putc (struct fbuf *, uint8_t); +ssize_t fbuf_write (struct fbuf *, const void *, size_t); + +static inline int fbuf_getc (struct fbuf *); +ssize_t fbuf_read (struct fbuf *, void *, size_t); + +/* Implementation details. */ + +struct fbuf + { + const struct fbuf_class *class; + uint8_t *buffer; + + /* Offset in the underlying file descriptor: + + - In read mode, this is the offset of 'read_head'. + + - In write mode, this is the offset of 'write_tail'. + + Starts out at TYPE_MINIMUM (off_t), which indicates that the underlying + descriptor offset is not known. Negative errno values indicate + errors. */ + off_t offset; + + /* + In read mode, buffered data is read into the start of 'buffer'. + Initially 'read_tail' points to 'buffer' and 'read_head' just past the + last byte of buffered data. As the client reads data, 'read_tail' + advances until it reaches 'read_head', then the buffer is re-filled and + the process repeats. + + offset in fd + | + v + +----------+------------------------+ + | | ...data to be read... | + +-----------------------------------+ + ^ ^ ^ + | | | + buffer read_tail read_head + + In write mode, read_tail and read_head are both NULL. */ + uint8_t *read_tail, *read_head; + + /* + In write mode, write_tail and write_head initially point to 'buffer' and + 'write_end' to 'buffer + FBUF_SIZE'. As the client writes, its data is + copied to and advances 'write_head', limited by 'write_end'. As the + fbuf flushes data to the fd, 'write_tail' advances, and when + 'write_tail' catches 'write_head', both reset to 'buffer'. + + offset in fd + | + v + +----------+------------------------+-------------------+ + | |...data to be flushed...| | + +-----------------------------------+-------------------+ + ^ ^ ^ ^ + | | | | + buffer write_tail write_head write_end + + In read mode, write_tail, write_head, and write_end are all NULL. */ + uint8_t *write_tail, *write_head, *write_end; + int status; + }; + +static inline int +fbuf_putc (struct fbuf *fbuf, uint8_t byte) +{ + if (fbuf->write_head < fbuf->write_end) + { + *fbuf->write_head++ = byte; + return 0; + } + else + return fbuf_write (fbuf, &byte, 1); +} + +int fbuf_getc__ (struct fbuf *); +int +fbuf_getc (struct fbuf *fbuf) +{ + return (fbuf->read_tail < fbuf->read_head + ? *fbuf->read_tail++ + : fbuf_getc__ (fbuf)); +} + +#endif /* libpspp/fbuf.h */ diff --git a/src/output/ascii.c b/src/output/ascii.c index d7ec317d70..5a4c8c7e56 100644 --- a/src/output/ascii.c +++ b/src/output/ascii.c @@ -1100,7 +1100,7 @@ ascii_open_page (struct ascii_driver *a) if (a->file == NULL) { - a->file = fn_open (a->handle, a->append ? "a" : "w"); + a->file = fn_fopen (a->handle, a->append ? "a" : "w"); if (a->file != NULL) { if ( isatty (fileno (a->file))) diff --git a/src/output/csv.c b/src/output/csv.c index 455584914e..2802f4bbef 100644 --- a/src/output/csv.c +++ b/src/output/csv.c @@ -92,7 +92,7 @@ csv_create (struct file_handle *fh, enum settings_output_devices device_type, csv->titles = parse_boolean (opt (d, o, "titles", "true")); csv->captions = parse_boolean (opt (d, o, "captions", "true")); csv->handle = fh; - csv->file = fn_open (fh, "w"); + csv->file = fn_fopen (fh, "w"); csv->n_items = 0; if (csv->file == NULL) diff --git a/src/output/html.c b/src/output/html.c index 38b89719a4..618db2795c 100644 --- a/src/output/html.c +++ b/src/output/html.c @@ -109,7 +109,7 @@ html_create (struct file_handle *fh, enum settings_output_devices device_type, parse_color (d, o, "background-color", "#FFFFFFFFFFFF", &html->bg); parse_color (d, o, "foreground-color", "#000000000000", &html->fg); #endif - html->file = fn_open (html->handle, "w"); + html->file = fn_fopen (html->handle, "w"); if (html->file == NULL) { msg_error (errno, _("error opening output file `%s'"), fh_get_file_name (html->handle)); diff --git a/src/output/msglog.c b/src/output/msglog.c index 0d03295710..dbbff3ee10 100644 --- a/src/output/msglog.c +++ b/src/output/msglog.c @@ -63,7 +63,7 @@ msglog_create (const char *file_name) struct file_handle *handle = fh_create_file (NULL, file_name, NULL, fh_default_properties ()); - file = fn_open (handle, "w"); + file = fn_fopen (handle, "w"); if (file == NULL) { msg_error (errno, _("error opening output file `%s'"), file_name); diff --git a/utilities/pspp-convert.c b/utilities/pspp-convert.c index f21e5bdb8b..ad91ef9da2 100644 --- a/utilities/pspp-convert.c +++ b/utilities/pspp-convert.c @@ -262,7 +262,7 @@ decrypt_file (struct encrypted_file *enc, if (!encrypted_file_unlock (enc, password)) error (1, 0, _("sorry, wrong password")); - out = fn_open (ofh, "wb"); + out = fn_fopen (ofh, "wb"); if (out == NULL) error (1, errno, ("%s: error opening output file"), output_filename);