1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/csv-file-writer.h"
29 #include "data/calendar.h"
30 #include "data/case.h"
31 #include "data/casewriter-provider.h"
32 #include "data/casewriter.h"
33 #include "data/data-out.h"
34 #include "data/dictionary.h"
35 #include "data/file-handle-def.h"
36 #include "data/file-name.h"
37 #include "data/format.h"
38 #include "data/make-file.h"
39 #include "data/missing-values.h"
40 #include "data/settings.h"
41 #include "data/value-labels.h"
42 #include "data/variable.h"
43 #include "libpspp/assertion.h"
44 #include "libpspp/i18n.h"
45 #include "libpspp/message.h"
46 #include "libpspp/str.h"
48 #include "gl/ftoastr.h"
49 #include "gl/minmax.h"
50 #include "gl/unlocked-io.h"
51 #include "gl/xalloc.h"
54 #define _(msgid) gettext (msgid)
55 #define N_(msgid) (msgid)
57 /* A variable in a CSV file. */
60 int width; /* Variable width (0 to 32767). */
61 int case_index; /* Index into case. */
62 struct fmt_spec format; /* Print format. */
63 struct missing_values missing; /* User-missing values, if recoding. */
64 struct val_labs *val_labs; /* Value labels, if any and they are in use. */
67 /* Comma-separated value (CSV) file writer. */
70 struct file_handle *fh; /* File handle. */
71 struct fh_lock *lock; /* Mutual exclusion for file. */
72 FILE *file; /* File stream. */
73 struct replace_file *rf; /* Ticket for replacing output file. */
75 struct csv_writer_options opts;
77 char *encoding; /* Encoding used by variables. */
80 struct csv_var *csv_vars; /* Variables. */
81 size_t n_csv_vars; /* Number of variables. */
84 static const struct casewriter_class csv_file_casewriter_class;
86 static void write_var_names (struct csv_writer *, const struct dictionary *);
88 static bool write_error (const struct csv_writer *);
89 static bool close_writer (struct csv_writer *);
91 /* Initializes OPTS with default options for writing a CSV file. */
93 csv_writer_options_init (struct csv_writer_options *opts)
95 opts->recode_user_missing = false;
96 opts->include_var_names = false;
97 opts->use_value_labels = false;
98 opts->use_print_formats = false;
99 opts->decimal = settings_get_decimal_char (FMT_F);
100 opts->delimiter = ',';
101 opts->qualifier = '"';
104 /* Opens the CSV file designated by file handle FH for writing cases from
105 dictionary DICT according to the given OPTS.
107 No reference to D is retained, so it may be modified or
108 destroyed at will after this function returns. */
110 csv_writer_open (struct file_handle *fh, const struct dictionary *dict,
111 const struct csv_writer_options *opts)
113 struct csv_writer *w;
116 /* Create and initialize writer. */
117 w = xmalloc (sizeof *w);
125 w->encoding = xstrdup (dict_get_encoding (dict));
127 w->n_csv_vars = dict_get_var_cnt (dict);
128 w->csv_vars = xnmalloc (w->n_csv_vars, sizeof *w->csv_vars);
129 for (i = 0; i < w->n_csv_vars; i++)
131 const struct variable *var = dict_get_var (dict, i);
132 struct csv_var *cv = &w->csv_vars[i];
134 cv->width = var_get_width (var);
135 cv->case_index = var_get_case_index (var);
137 cv->format = *var_get_print_format (var);
138 if (opts->recode_user_missing)
139 mv_copy (&cv->missing, var_get_missing_values (var));
141 mv_init (&cv->missing, cv->width);
143 if (opts->use_value_labels)
144 cv->val_labs = val_labs_clone (var_get_value_labels (var));
149 /* Open file handle as an exclusive writer. */
150 /* TRANSLATORS: this fragment will be interpolated into messages in fh_lock()
151 that identify types of files. */
152 w->lock = fh_lock (fh, FH_REF_FILE, N_("CSV file"), FH_ACC_WRITE, true);
156 /* Create the file on disk. */
157 w->rf = replace_file_start (fh_get_file_name (fh), "w", 0666,
161 msg (ME, _("Error opening `%s' for writing as a system file: %s."),
162 fh_get_file_name (fh), strerror (errno));
166 if (opts->include_var_names)
167 write_var_names (w, dict);
172 return casewriter_create (dict_get_proto (dict),
173 &csv_file_casewriter_class, w);
181 csv_field_needs_quoting (struct csv_writer *w, const char *s, size_t len)
185 for (p = s; p < &s[len]; p++)
186 if (*p == w->opts.qualifier || *p == w->opts.delimiter
187 || *p == '\n' || *p == '\r')
194 csv_output_buffer (struct csv_writer *w, const char *s, size_t len)
196 if (csv_field_needs_quoting (w, s, len))
200 putc (w->opts.qualifier, w->file);
201 for (p = s; p < &s[len]; p++)
203 /* We are writing the output file in text mode, so transform any
204 explicit CR-LF line breaks into LF only, to allow the C library to
205 use correct system-specific new-lines. */
206 if (*p == '\r' && p[1] == '\n')
209 if (*p == w->opts.qualifier)
210 putc (w->opts.qualifier, w->file);
213 putc (w->opts.qualifier, w->file);
216 fwrite (s, 1, len, w->file);
220 csv_output_string (struct csv_writer *w, const char *s)
222 csv_output_buffer (w, s, strlen (s));
226 write_var_names (struct csv_writer *w, const struct dictionary *d)
230 for (i = 0; i < w->n_csv_vars; i++)
233 putc (w->opts.delimiter, w->file);
234 csv_output_string (w, var_get_name (dict_get_var (d, i)));
236 putc ('\n', w->file);
240 csv_output_format (struct csv_writer *w, const struct csv_var *cv,
241 const union value *value)
243 char *s = data_out (value, w->encoding, &cv->format);
244 struct substring ss = ss_cstr (s);
245 if (cv->format.type != FMT_A)
246 ss_trim (&ss, ss_cstr (" "));
248 ss_rtrim (&ss, ss_cstr (" "));
249 csv_output_buffer (w, ss.string, ss.length);
254 extract_date (double number, int *y, int *m, int *d)
258 calendar_offset_to_gregorian (number / 60. / 60. / 24., y, m, d, &yd);
259 return fmod (number, 60. * 60. * 24.);
263 extract_time (double number, double *H, int *M, int *S)
265 *H = floor (number / 60. / 60.);
266 number = fmod (number, 60. * 60.);
268 *M = floor (number / 60.);
269 number = fmod (number, 60.);
275 csv_write_var__ (struct csv_writer *w, const struct csv_var *cv,
276 const union value *value)
280 label = val_labs_find (cv->val_labs, value);
282 csv_output_string (w, label);
283 else if (cv->width == 0 && value->f == SYSMIS)
284 csv_output_buffer (w, " ", 1);
285 else if (w->opts.use_print_formats)
286 csv_output_format (w, cv, value);
289 char s[MAX (DBL_STRLEN_BOUND, 128)];
292 switch (cv->format.type)
316 dtoastr (s, sizeof s, 0, 0, value->f);
317 cp = strpbrk (s, ".,");
319 *cp = w->opts.decimal;
336 extract_date (value->f, &y, &m, &d);
337 snprintf (s, sizeof s, "%02d/%02d/%04d", m, d, y);
349 extract_time (extract_date (value->f, &y, &m, &d), &H, &M, &S);
350 snprintf (s, sizeof s, "%02d/%02d/%04d %02.0f:%02d:%02d",
361 extract_time (fabs (value->f), &H, &M, &S);
362 snprintf (s, sizeof s, "%s%02.0f:%02d:%02d",
363 value->f < 0 ? "-" : "", H, M, S);
369 csv_output_format (w, cv, value);
372 case FMT_NUMBER_OF_FORMATS:
375 csv_output_string (w, s);
380 csv_write_var (struct csv_writer *w, const struct csv_var *cv,
381 const union value *value)
383 if (mv_is_value_missing (&cv->missing, value, MV_USER))
387 value_init (&missing, cv->width);
388 value_set_missing (&missing, cv->width);
389 csv_write_var__ (w, cv, &missing);
390 value_destroy (&missing, cv->width);
393 csv_write_var__ (w, cv, value);
397 csv_write_case (struct csv_writer *w, const struct ccase *c)
401 for (i = 0; i < w->n_csv_vars; i++)
403 const struct csv_var *cv = &w->csv_vars[i];
406 putc (w->opts.delimiter, w->file);
407 csv_write_var (w, cv, case_data_idx (c, cv->case_index));
409 putc ('\n', w->file);
412 /* Writes case C to CSV file W. */
414 csv_file_casewriter_write (struct casewriter *writer, void *w_,
417 struct csv_writer *w = w_;
419 if (ferror (w->file))
421 casewriter_force_error (writer);
426 csv_write_case (w, c);
430 /* Destroys CSV file writer W. */
432 csv_file_casewriter_destroy (struct casewriter *writer, void *w_)
434 struct csv_writer *w = w_;
435 if (!close_writer (w))
436 casewriter_force_error (writer);
439 /* Returns true if an I/O error has occurred on WRITER, false otherwise. */
441 write_error (const struct csv_writer *writer)
443 return ferror (writer->file);
446 /* Closes a CSV file after we're done with it.
447 Returns true if successful, false if an I/O error occurred. */
449 close_writer (struct csv_writer *w)
462 if (fclose (w->file) == EOF)
466 msg (ME, _("An I/O error occurred writing CSV file `%s'."),
467 fh_get_file_name (w->fh));
469 if (ok ? !replace_file_commit (w->rf) : !replace_file_abort (w->rf))
478 for (i = 0; i < w->n_csv_vars; i++)
480 struct csv_var *cv = &w->csv_vars[i];
481 mv_destroy (&cv->missing);
482 val_labs_destroy (cv->val_labs);
491 /* CSV file writer casewriter class. */
492 static const struct casewriter_class csv_file_casewriter_class =
494 csv_file_casewriter_write,
495 csv_file_casewriter_destroy,