/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-2004, 2006 Free Software Foundation, Inc.
+ Copyright (C) 1997-2004, 2006, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <config.h>
-#include <language/data-io/data-writer.h>
+#include "language/data-io/data-writer.h"
#include <assert.h>
#include <errno.h>
#include <stdlib.h>
#include <sys/stat.h>
-#include <data/file-name.h>
-#include <data/make-file.h>
-#include <language/data-io/file-handle.h>
-#include <libpspp/assertion.h>
-#include <libpspp/integer-format.h>
-#include <libpspp/message.h>
-#include <libpspp/str.h>
+#include "data/file-name.h"
+#include "data/make-file.h"
+#include "language/data-io/file-handle.h"
+#include "libpspp/assertion.h"
+#include "libpspp/encoding-guesser.h"
+#include "libpspp/integer-format.h"
+#include "libpspp/i18n.h"
+#include "libpspp/message.h"
+#include "libpspp/str.h"
-#include "minmax.h"
-#include "xalloc.h"
+#include "gl/minmax.h"
+#include "gl/xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
struct fh_lock *lock; /* Exclusive access to file. */
FILE *file; /* Associated file. */
struct replace_file *rf; /* Atomic file replacement support. */
+ char *encoding; /* Encoding. */
+ enum fh_line_ends line_ends; /* Line ends for text files. */
+
+ int unit; /* Unit width, in bytes. */
+ char cr[MAX_UNIT]; /* \r in encoding, 'unit' bytes long. */
+ char lf[MAX_UNIT]; /* \n in encoding, 'unit' bytes long. */
+ char spaces[32]; /* 32 bytes worth of ' ' in encoding. */
};
-/* Opens a file handle for writing as a data file. */
+/* Opens a file handle for writing as a data file.
+
+ The encoding of the file written is by default that of FH itself. If
+ ENCODING is nonnull, then it overrides the default encoding.
+
+ *However*: ENCODING directly affects only text strings written by the data
+ writer code itself, that is, new-lines in FH_MODE_TEXT and space padding in
+ FH_MODE_FIXED mode. The client must do its own encoding translation for the
+ data that it writes. (This is unavoidable because sometimes the data
+ written includes binary data that reencoding would mangle.) The client can
+ obtain the encoding to re-encode into with dfm_writer_get_encoding(). */
struct dfm_writer *
-dfm_open_writer (struct file_handle *fh)
+dfm_open_writer (struct file_handle *fh, const char *encoding)
{
+ struct encoding_info ei;
struct dfm_writer *w;
struct fh_lock *lock;
+ int ofs;
lock = fh_lock (fh, FH_REF_FILE, N_("data file"), FH_ACC_WRITE, false);
if (lock == NULL)
if (w != NULL)
return w;
+ encoding = encoding_guess_parse_encoding (encoding != NULL
+ ? encoding
+ : fh_get_encoding (fh));
+ get_encoding_info (&ei, encoding);
+
w = xmalloc (sizeof *w);
w->fh = fh_ref (fh);
w->lock = lock;
- w->rf = replace_file_start (fh_get_file_name (w->fh), "wb",
- (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP
- | S_IROTH | S_IWOTH), &w->file, NULL);
+ w->rf = replace_file_start (w->fh, "wb", 0666, &w->file);
+ w->encoding = xstrdup (encoding);
+ w->line_ends = fh_get_line_ends (fh);
+ w->unit = ei.unit;
+ memcpy (w->cr, ei.cr, sizeof w->cr);
+ memcpy (w->lf, ei.lf, sizeof w->lf);
+ for (ofs = 0; ofs + ei.unit <= sizeof w->spaces; ofs += ei.unit)
+ memcpy (&w->spaces[ofs], ei.space, ei.unit);
+
if (w->rf == NULL)
{
- msg (ME, _("An error occurred while opening \"%s\" for writing "
+ msg (ME, _("An error occurred while opening `%s' for writing "
"as a data file: %s."),
fh_get_file_name (w->fh), strerror (errno));
dfm_close_writer (w);
return w;
}
-/* Returns false if an I/O error occurred on WRITER, true otherwise. */
+/* Returns true if an I/O error occurred on WRITER, false otherwise. */
bool
dfm_write_error (const struct dfm_writer *writer)
{
{
case FH_MODE_TEXT:
fwrite (rec, len, 1, w->file);
- putc ('\n', w->file);
+ if (w->line_ends == FH_END_CRLF)
+ fwrite (w->cr, w->unit, 1, w->file);
+ fwrite (w->lf, w->unit, 1, w->file);
break;
case FH_MODE_FIXED:
fwrite (rec, write_bytes, 1, w->file);
while (pad_bytes > 0)
{
- static const char spaces[32] = " ";
- size_t chunk = MIN (pad_bytes, sizeof spaces);
- fwrite (spaces, chunk, 1, w->file);
+ size_t chunk = MIN (pad_bytes, sizeof w->spaces);
+ fwrite (w->spaces, chunk, 1, w->file);
pad_bytes -= chunk;
}
}
ok = !dfm_write_error (w) && !fn_close (file_name, w->file);
if (!ok)
- msg (ME, _("I/O error occurred writing data file \"%s\"."), file_name);
+ msg (ME, _("I/O error occurred writing data file `%s'."), file_name);
if (ok ? !replace_file_commit (w->rf) : !replace_file_abort (w->rf))
ok = false;
}
fh_unref (w->fh);
+ free (w->encoding);
free (w);
return ok;
}
-/* Returns the legacy character encoding of data written to WRITER. */
+/* Returns the encoding of data written to WRITER. */
const char *
-dfm_writer_get_legacy_encoding (const struct dfm_writer *writer)
+dfm_writer_get_encoding (const struct dfm_writer *writer)
{
- return fh_get_legacy_encoding (writer->fh);
+ return writer->encoding;
}