From fcb75da3200f19842a2eb12ca00063a727a226fd Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 8 Aug 2010 13:50:49 -0700 Subject: [PATCH] Implement SAVE TRANSLATE to comma- and tab-delimited text formats. --- NEWS | 5 +- doc/files.texi | 135 +++++++ src/data/automake.mk | 2 + src/data/csv-file-writer.c | 493 +++++++++++++++++++++++ src/data/csv-file-writer.h | 44 ++ src/language/command.def | 2 +- src/language/data-io/automake.mk | 1 + src/language/data-io/save-translate.c | 288 +++++++++++++ tests/automake.mk | 1 + tests/language/data-io/save-translate.at | 92 +++++ 10 files changed, 1061 insertions(+), 2 deletions(-) create mode 100644 src/data/csv-file-writer.c create mode 100644 src/data/csv-file-writer.h create mode 100644 src/language/data-io/save-translate.c create mode 100644 tests/language/data-io/save-translate.at diff --git a/NEWS b/NEWS index d0b60630d5..a6192187aa 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,5 @@ PSPP NEWS -- history of user-visible changes. -Time-stamp: <2010-05-21 16:40:53 blp> +Time-stamp: <2010-08-04 22:26:05 blp> Copyright (C) 1996-9, 2000, 2008, 2009, 2010 Free Software Foundation, Inc. See the end for copying conditions. @@ -9,6 +9,9 @@ Changes from 0.7.3 to 0.7.5: * The MRSETS command is now implemented. + * SAVE TRANSLATE is now implemented, with initial support for saving + data in comma-separated value and tab-delimited formats. + Changes from 0.7.2 to 0.7.3: * Charts are now produced with Cairo and Pango, instead of libplot. diff --git a/doc/files.texi b/doc/files.texi index 1323749057..707447e099 100644 --- a/doc/files.texi +++ b/doc/files.texi @@ -11,6 +11,7 @@ portable files. * GET DATA:: Read from foreign files. * IMPORT:: Read from a portable file. * SAVE:: Write to a system file. +* SAVE TRANSLATE:: Write data in foreign file formats. * SYSFILE INFO:: Display system file dictionary. * XEXPORT:: Write to a portable file, as a transformation. * XSAVE:: Write to a system file, as a transformation. @@ -720,6 +721,140 @@ The NAMES and MAP subcommands are currently ignored. @cmd{SAVE} causes the data to be read. It is a procedure. +@node SAVE TRANSLATE +@section SAVE TRANSLATE +@vindex SAVE TRANSLATE + +@display +SAVE TRANSLATE + /OUTFILE=@{'file-name',file_handle@} + /TYPE=@{CSV,TAB@} + [/REPLACE] + [/MISSING=@{IGNORE,RECODE@}] + + [/DROP=var_list] + [/KEEP=var_list] + [/RENAME=(src_names=target_names)@dots{}] + [/UNSELECTED=@{RETAIN,DELETE@}] + [/MAP] + + @dots{}additional subcommands depending on TYPE@dots{} +@end display + +The @cmd{SAVE TRANSLATE} command is used to save data into various +formats understood by other applications. + +The OUTFILE and TYPE subcommands are mandatory. OUTFILE specifies the +file to be written, as a string file name or a file handle +(@pxref{File Handles}). TYPE determines the type of the file or +source to read. It must be one of the following: + +@table @asis +@item CSV +Comma-separated value format, + +@item TAB +Tab-delimited format. +@end table + +By default, SAVE TRANSLATE will not overwrite an existing file. Use +REPLACE to force an existing file to be overwritten. + +With MISSING=IGNORE, the default, SAVE TRANSLATE treats user-missing +values as if they were not missing. Specify MISSING=RECODE to output +numeric user-missing values like system-missing values and string +user-missing values as all spaces. + +By default, all the variables in the active file dictionary are saved +to the system file, but DROP or KEEP can select a subset of variable +to save. The RENAME subcommand can also be used to change the names +under which variables are saved. UNSELECTED determines whether cases +filtered out by the FILTER command are written to the output file. +These subcommands have the same syntax and meaning as on the +@cmd{SAVE} command (@pxref{SAVE}). + +Each supported file type has additional subcommands, explained in +separate sections below. + +@cmd{SAVE TRANSLATE} causes the data to be read. It is a procedure. + +@menu +* SAVE TRANSLATE /TYPE=CSV and TYPE=TAB:: +@end menu + +@node SAVE TRANSLATE /TYPE=CSV and TYPE=TAB +@subsection Writing Comma- and Tab-Separated Data Files + +@display +SAVE TRANSLATE + /OUTFILE=@{'file-name',file_handle@} + /TYPE=CSV + [/REPLACE] + [/MISSING=@{IGNORE,RECODE@}] + + [/DROP=var_list] + [/KEEP=var_list] + [/RENAME=(src_names=target_names)@dots{}] + [/UNSELECTED=@{RETAIN,DELETE@}] + + [/FIELDNAMES] + [/CELLS=@{VALUES,LABELS@}] + [/TEXTOPTIONS DELIMITER='delimiter'] + [/TEXTOPTIONS QUALIFIER='qualifier'] + [/TEXTOPTIONS DECIMAL=@{DOT,COMMA@}] + [/TEXTOPTIONS FORMAT=@{PLAIN,VARIABLE@}] +@end display + +The SAVE TRANSLATE command with TYPE=CSV or TYPE=TAB writes data in a +comma- or tab-separated value format similar to that described by +RFC@tie{}4180. Each variable becomes one output column, and each case +becomes one line of output. If FIELDNAMES is specified, an additional +line at the top of the output file lists variable names. + +The CELLS and TEXTOPTIONS FORMAT settings determine how values are +written to the output file: + +@table @asis +@item CELLS=VALUES FORMAT=PLAIN (the default settings) +Writes variables to the output in ``plain'' formats that ignore the +details of variable formats. Numeric values are written as plain +decimal numbers with enough digits to indicate their exact values in +machine representation. Numeric values include @samp{e} followed by +an exponent if the exponent value would be less than -4 or greater +than 16. Dates are written in MM/DD/YYYY format and times in HH:MM:SS +format. WKDAY and MONTH values are written as decimal numbers. + +Numeric values use, by default, the decimal point character set with +SET DECIMAL (@pxref{SET DECIMAL}). Use DECIMAL=DOT or DECIMAL=COMMA +to force a particular decimal point character. + +@item CELLS=VALUES FORMAT=VARIABLE +Writes variables using their print formats. Leading and trailing +spaces are removed from numeric values, and trailing spaces are +removed from string values. + +@item CELLS=LABEL FORMAT=PLAIN +@itemx CELLS=LABEL FORMAT=VARIABLE +Writes value labels where they exist, and otherwise writes the values +themselves as described above. +@end table + +Regardless of CELLS and TEXTOPTIONS FORMAT, numeric system-missing +values are output as a single space. + +For TYPE=TAB, tab characters delimit values. For TYPE=CSV, the +TEXTOPTIONS DELIMITER and DECIMAL settings determine the character +that separate values within a line. If DELIMITER is specified, then +the specified string separate values. If DELIMITER is not specified, +then the default is a comma with DECIMAL=DOT or a semicolon with +DECIMAL=COMMA. If DECIMAL is not given either, it is implied by the +decimal point character set with SET DECIMAL (@pxref{SET DECIMAL}). + +The TEXTOPTIONS QUALIFIER setting specifies a character that is output +before and after a value that contains the delimiter character or the +qualifier character. The default is a double quote (@samp{@@}). A +qualifier character that appears within a value is doubled. + @node SYSFILE INFO @section SYSFILE INFO @vindex SYSFILE INFO diff --git a/src/data/automake.mk b/src/data/automake.mk index 19403cf7f3..10fbed3d9e 100644 --- a/src/data/automake.mk +++ b/src/data/automake.mk @@ -42,6 +42,8 @@ src_data_libdata_la_SOURCES = \ src/data/case.h \ src/data/case-tmpfile.c \ src/data/case-tmpfile.h \ + src/data/csv-file-writer.c \ + src/data/csv-file-writer.h \ src/data/data-in.c \ src/data/data-in.h \ src/data/data-out.c \ diff --git a/src/data/csv-file-writer.c b/src/data/csv-file-writer.c new file mode 100644 index 0000000000..7d35b1c2e0 --- /dev/null +++ b/src/data/csv-file-writer.c @@ -0,0 +1,493 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "data/csv-file-writer.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "data/calendar.h" +#include "data/case.h" +#include "data/casewriter-provider.h" +#include "data/casewriter.h" +#include "data/data-out.h" +#include "data/dictionary.h" +#include "data/file-handle-def.h" +#include "data/file-name.h" +#include "data/format.h" +#include "data/make-file.h" +#include "data/missing-values.h" +#include "data/settings.h" +#include "data/value-labels.h" +#include "data/variable.h" +#include "libpspp/assertion.h" +#include "libpspp/i18n.h" +#include "libpspp/message.h" +#include "libpspp/str.h" + +#include "gl/unlocked-io.h" +#include "gl/xalloc.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) (msgid) + +/* A variable in a CSV file. */ +struct csv_var + { + int width; /* Variable width (0 to 32767). */ + int case_index; /* Index into case. */ + struct fmt_spec format; /* Print format. */ + struct missing_values missing; /* User-missing values, if recoding. */ + struct val_labs *val_labs; /* Value labels, if any and they are in use. */ + }; + +/* Comma-separated value (CSV) file writer. */ +struct csv_writer + { + struct file_handle *fh; /* File handle. */ + struct fh_lock *lock; /* Mutual exclusion for file. */ + FILE *file; /* File stream. */ + struct replace_file *rf; /* Ticket for replacing output file. */ + + struct csv_writer_options opts; + + char *encoding; /* Encoding used by variables. */ + + /* Variables. */ + struct csv_var *csv_vars; /* Variables. */ + size_t n_csv_vars; /* Number of variables. */ + }; + +static const struct casewriter_class csv_file_casewriter_class; + +static void write_var_names (struct csv_writer *, const struct dictionary *); + +static bool write_error (const struct csv_writer *); +static bool close_writer (struct csv_writer *); + +/* Initializes OPTS with default options for writing a CSV file. */ +void +csv_writer_options_init (struct csv_writer_options *opts) +{ + opts->recode_user_missing = false; + opts->include_var_names = false; + opts->use_value_labels = false; + opts->use_print_formats = false; + opts->decimal = settings_get_decimal_char (FMT_F); + opts->delimiter = 0; + opts->qualifier = '"'; +} + +/* Opens the CSV file designated by file handle FH for writing cases from + dictionary DICT according to the given OPTS. + + No reference to D is retained, so it may be modified or + destroyed at will after this function returns. */ +struct casewriter * +csv_writer_open (struct file_handle *fh, const struct dictionary *dict, + const struct csv_writer_options *opts) +{ + struct csv_writer *w; + int i; + + /* Create and initialize writer. */ + w = xmalloc (sizeof *w); + w->fh = fh_ref (fh); + w->lock = NULL; + w->file = NULL; + w->rf = NULL; + + w->opts = *opts; + + w->encoding = (dict_get_encoding (dict) + ? xstrdup (dict_get_encoding (dict)) + : NULL); + + w->n_csv_vars = dict_get_var_cnt (dict); + w->csv_vars = xnmalloc (w->n_csv_vars, sizeof *w->csv_vars); + for (i = 0; i < w->n_csv_vars; i++) + { + const struct variable *var = dict_get_var (dict, i); + struct csv_var *cv = &w->csv_vars[i]; + + cv->width = var_get_width (var); + cv->case_index = var_get_case_index (var); + + cv->format = *var_get_print_format (var); + if (opts->recode_user_missing) + mv_copy (&cv->missing, var_get_missing_values (var)); + else + mv_init (&cv->missing, cv->width); + + if (opts->use_value_labels) + cv->val_labs = val_labs_clone (var_get_value_labels (var)); + else + cv->val_labs = NULL; + } + + /* Open file handle as an exclusive writer. */ + /* TRANSLATORS: this fragment will be interpolated into messages in fh_lock() + that identify types of files. */ + w->lock = fh_lock (fh, FH_REF_FILE, N_("CSV file"), FH_ACC_WRITE, true); + if (w->lock == NULL) + goto error; + + /* Create the file on disk. */ + w->rf = replace_file_start (fh_get_file_name (fh), "w", 0666, + &w->file, NULL); + if (w->rf == NULL) + { + msg (ME, _("Error opening \"%s\" for writing as a system file: %s."), + fh_get_file_name (fh), strerror (errno)); + goto error; + } + + if (opts->include_var_names) + write_var_names (w, dict); + + if (write_error (w)) + goto error; + + return casewriter_create (dict_get_proto (dict), + &csv_file_casewriter_class, w); + +error: + close_writer (w); + return NULL; +} + +static bool +csv_field_needs_quoting (struct csv_writer *w, const char *s, size_t len) +{ + const char *p; + + for (p = s; p < &s[len]; p++) + if (*p == w->opts.qualifier || *p == w->opts.delimiter + || *p == '\n' || *p == '\r') + return true; + + return false; +} + +static void +csv_output_buffer (struct csv_writer *w, const char *s, size_t len) +{ + if (csv_field_needs_quoting (w, s, len)) + { + const char *p; + + putc (w->opts.qualifier, w->file); + for (p = s; p < &s[len]; p++) + { + if (*p == w->opts.qualifier) + putc (w->opts.qualifier, w->file); + putc (*p, w->file); + } + putc (w->opts.qualifier, w->file); + } + else + fwrite (s, 1, len, w->file); +} + +static void +csv_output_string (struct csv_writer *w, const char *s) +{ + csv_output_buffer (w, s, strlen (s)); +} + +static void +write_var_names (struct csv_writer *w, const struct dictionary *d) +{ + size_t i; + + for (i = 0; i < w->n_csv_vars; i++) + { + if (i > 0) + putc (w->opts.delimiter, w->file); + csv_output_string (w, var_get_name (dict_get_var (d, i))); + } + putc ('\n', w->file); +} + +static void +csv_output_format (struct csv_writer *w, const struct csv_var *cv, + const union value *value) +{ + char *s = data_out (value, w->encoding, &cv->format); + struct substring ss = ss_cstr (s); + if (cv->format.type != FMT_A) + ss_trim (&ss, ss_cstr (" ")); + else + ss_rtrim (&ss, ss_cstr (" ")); + csv_output_buffer (w, ss.string, ss.length); + free (s); +} + +static double +extract_date (double number, int *y, int *m, int *d) +{ + int yd; + + calendar_offset_to_gregorian (number / 60. / 60. / 24., y, m, d, &yd); + return fmod (number, 60. * 60. * 24.); +} + +static void +extract_time (double number, double *H, int *M, int *S) +{ + *H = floor (number / 60. / 60.); + number = fmod (number, 60. * 60.); + + *M = floor (number / 60.); + number = fmod (number, 60.); + + *S = floor (number); +} + +static void +csv_write_var__ (struct csv_writer *w, const struct csv_var *cv, + const union value *value) +{ + const char *label; + + label = val_labs_find (cv->val_labs, value); + if (label != NULL) + csv_output_string (w, label); + else if (cv->width == 0 && value->f == SYSMIS) + csv_output_buffer (w, " ", 1); + else if (w->opts.use_print_formats) + csv_output_format (w, cv, value); + else + { + char s[128]; + + switch (cv->format.type) + { + case FMT_F: + case FMT_COMMA: + case FMT_DOT: + case FMT_DOLLAR: + case FMT_PCT: + case FMT_E: + case FMT_CCA: + case FMT_CCB: + case FMT_CCC: + case FMT_CCD: + case FMT_CCE: + case FMT_N: + case FMT_Z: + case FMT_P: + case FMT_PK: + case FMT_IB: + case FMT_PIB: + case FMT_PIBHEX: + case FMT_RB: + case FMT_RBHEX: + case FMT_WKDAY: + case FMT_MONTH: + snprintf (s, sizeof s, "%.*g", DBL_DIG + 1, value->f); + if (w->opts.decimal != '.') + { + char *cp = strchr (s, '.'); + if (cp != NULL) + *cp = w->opts.decimal; + } + break; + + case FMT_DATE: + case FMT_ADATE: + case FMT_EDATE: + case FMT_JDATE: + case FMT_SDATE: + case FMT_QYR: + case FMT_MOYR: + case FMT_WKYR: + if (value->f < 0) + strcpy (s, " "); + else + { + int y, m, d; + + extract_date (value->f, &y, &m, &d); + snprintf (s, sizeof s, "%02d/%02d/%04d", m, d, y); + } + break; + + case FMT_DATETIME: + if (value->f < 0) + strcpy (s, " "); + else + { + int y, m, d, M, S; + double H; + + extract_time (extract_date (value->f, &y, &m, &d), &H, &M, &S); + snprintf (s, sizeof s, "%02d/%02d/%04d %02.0f:%02d:%02d", + m, d, y, H, M, S); + } + break; + + case FMT_TIME: + case FMT_DTIME: + { + double H; + int M, S; + + extract_time (fabs (value->f), &H, &M, &S); + snprintf (s, sizeof s, "%s%02.0f:%02d:%02d", + value->f < 0 ? "-" : "", H, M, S); + } + break; + + case FMT_A: + case FMT_AHEX: + csv_output_format (w, cv, value); + return; + + case FMT_NUMBER_OF_FORMATS: + NOT_REACHED (); + } + csv_output_string (w, s); + } +} + +static void +csv_write_var (struct csv_writer *w, const struct csv_var *cv, + const union value *value) +{ + if (mv_is_value_missing (&cv->missing, value, MV_USER)) + { + union value missing; + + value_init (&missing, cv->width); + value_set_missing (&missing, cv->width); + csv_write_var__ (w, cv, &missing); + value_destroy (&missing, cv->width); + } + else + csv_write_var__ (w, cv, value); +} + +static void +csv_write_case (struct csv_writer *w, const struct ccase *c) +{ + size_t i; + + for (i = 0; i < w->n_csv_vars; i++) + { + const struct csv_var *cv = &w->csv_vars[i]; + + if (i > 0) + putc (w->opts.delimiter, w->file); + csv_write_var (w, cv, case_data_idx (c, cv->case_index)); + } + putc ('\n', w->file); +} + +/* Writes case C to CSV file W. */ +static void +csv_file_casewriter_write (struct casewriter *writer, void *w_, + struct ccase *c) +{ + struct csv_writer *w = w_; + + if (ferror (w->file)) + { + casewriter_force_error (writer); + case_unref (c); + return; + } + + csv_write_case (w, c); + case_unref (c); +} + +/* Destroys CSV file writer W. */ +static void +csv_file_casewriter_destroy (struct casewriter *writer, void *w_) +{ + struct csv_writer *w = w_; + if (!close_writer (w)) + casewriter_force_error (writer); +} + +/* Returns true if an I/O error has occurred on WRITER, false otherwise. */ +bool +write_error (const struct csv_writer *writer) +{ + return ferror (writer->file); +} + +/* Closes a CSV file after we're done with it. + Returns true if successful, false if an I/O error occurred. */ +bool +close_writer (struct csv_writer *w) +{ + size_t i; + bool ok; + + if (w == NULL) + return true; + + ok = true; + if (w->file != NULL) + { + if (write_error (w)) + ok = false; + if (fclose (w->file) == EOF) + ok = false; + + if (!ok) + msg (ME, _("An I/O error occurred writing CSV file \"%s\"."), + fh_get_file_name (w->fh)); + + if (ok ? !replace_file_commit (w->rf) : !replace_file_abort (w->rf)) + ok = false; + } + + fh_unlock (w->lock); + fh_unref (w->fh); + + free (w->encoding); + + for (i = 0; i < w->n_csv_vars; i++) + { + struct csv_var *cv = &w->csv_vars[i]; + mv_destroy (&cv->missing); + val_labs_destroy (cv->val_labs); + } + + free (w->csv_vars); + free (w); + + return ok; +} + +/* CSV file writer casewriter class. */ +static const struct casewriter_class csv_file_casewriter_class = + { + csv_file_casewriter_write, + csv_file_casewriter_destroy, + NULL, + }; diff --git a/src/data/csv-file-writer.h b/src/data/csv-file-writer.h new file mode 100644 index 0000000000..65723bc288 --- /dev/null +++ b/src/data/csv-file-writer.h @@ -0,0 +1,44 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef CSV_FILE_WRITER_H +#define CSV_FILE_WRITER_H 1 + +#include + +/* Writing comma-separated value (CSV) files. */ + +/* Options for creating CSV files. */ +struct csv_writer_options + { + bool recode_user_missing; /* Recode user-missing to system-missing? */ + bool include_var_names; /* Add header row with variable names? */ + bool use_value_labels; /* Write value labels where available? */ + bool use_print_formats; /* Honor variables' print formats? */ + char decimal; /* Decimal point character. */ + char delimiter; /* Field separator. */ + char qualifier; /* Quote character. */ + }; + +void csv_writer_options_init (struct csv_writer_options *); + +struct file_handle; +struct dictionary; +struct casewriter *csv_writer_open (struct file_handle *, + const struct dictionary *, + const struct csv_writer_options *); + +#endif /* csv-file-writer.h */ diff --git a/src/language/command.def b/src/language/command.def index 6096e759ca..174b80248e 100644 --- a/src/language/command.def +++ b/src/language/command.def @@ -123,6 +123,7 @@ DEF_CMD (S_DATA, 0, "RENAME VARIABLES", cmd_rename_variables) DEF_CMD (S_DATA, 0, "ROC", cmd_roc) DEF_CMD (S_DATA, 0, "SAMPLE", cmd_sample) DEF_CMD (S_DATA, 0, "SAVE", cmd_save) +DEF_CMD (S_DATA, 0, "SAVE TRANSLATE", cmd_save_translate) DEF_CMD (S_DATA, 0, "SORT CASES", cmd_sort_cases) DEF_CMD (S_DATA, F_ABBREV, "SORT", cmd_sort_cases) DEF_CMD (S_DATA, 0, "T-TEST", cmd_t_test) @@ -238,7 +239,6 @@ UNIMPL_CMD ("REPEATING DATA", "Specify multiple cases per input record") UNIMPL_CMD ("REPORT", "Pretty print working file") UNIMPL_CMD ("RESTORE", "Restore settings") UNIMPL_CMD ("RMV", "Replace missing values") -UNIMPL_CMD ("SAVE TRANSLATE", "Save to foriegn format") UNIMPL_CMD ("SCRIPT", "Run script file") UNIMPL_CMD ("SEASON", "Estimate seasonal factors") UNIMPL_CMD ("SELECTPRED", "Select predictor variables") diff --git a/src/language/data-io/automake.mk b/src/language/data-io/automake.mk index ca003d1d76..63c2c96a34 100644 --- a/src/language/data-io/automake.mk +++ b/src/language/data-io/automake.mk @@ -22,6 +22,7 @@ language_data_io_sources = \ src/language/data-io/placement-parser.h \ src/language/data-io/print-space.c \ src/language/data-io/print.c \ + src/language/data-io/save-translate.c \ src/language/data-io/save.c \ src/language/data-io/trim.c \ src/language/data-io/trim.h diff --git a/src/language/data-io/save-translate.c b/src/language/data-io/save-translate.c new file mode 100644 index 0000000000..3e33ff1125 --- /dev/null +++ b/src/language/data-io/save-translate.c @@ -0,0 +1,288 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include + +#include "data/case-map.h" +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/csv-file-writer.h" +#include "data/dictionary.h" +#include "data/file-name.h" +#include "data/format.h" +#include "data/procedure.h" +#include "data/settings.h" +#include "language/command.h" +#include "language/data-io/file-handle.h" +#include "language/data-io/trim.h" +#include "language/lexer/lexer.h" +#include "libpspp/message.h" + +#include "xalloc.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) (msgid) + +int +cmd_save_translate (struct lexer *lexer, struct dataset *ds) +{ + enum { CSV_FILE = 1, TAB_FILE } type; + + struct dictionary *dict; + struct case_map *map; + struct casewriter *writer; + struct file_handle *handle; + + struct csv_writer_options csv_opts; + + bool replace; + + bool retain_unselected; + bool recode_user_missing; + bool include_var_names; + bool use_value_labels; + bool use_print_formats; + char decimal; + char delimiter; + char qualifier; + + bool ok; + + type = 0; + + dict = dict_clone (dataset_dict (ds)); + map = NULL; + + handle = NULL; + replace = false; + + retain_unselected = true; + recode_user_missing = false; + include_var_names = false; + use_value_labels = false; + use_print_formats = false; + decimal = settings_get_decimal_char (FMT_F); + delimiter = 0; + qualifier = '"'; + + case_map_prepare_dict (dict); + dict_delete_scratch_vars (dict); + + while (lex_match (lexer, '/')) + { + if (lex_match_id (lexer, "OUTFILE")) + { + if (handle != NULL) + { + lex_sbc_only_once ("OUTFILE"); + goto error; + } + + lex_match (lexer, '='); + + handle = fh_parse (lexer, FH_REF_FILE); + if (handle == NULL) + goto error; + } + else if (lex_match_id (lexer, "TYPE")) + { + if (type != 0) + { + lex_sbc_only_once ("TYPE"); + goto error; + } + + lex_match (lexer, '='); + if (lex_match_id (lexer, "CSV")) + type = CSV_FILE; + else if (lex_match_id (lexer, "TAB")) + type = TAB_FILE; + else + { + lex_error (lexer, _("expecting %s or %s"), "CSV", "TAB"); + goto error; + } + } + else if (lex_match_id (lexer, "REPLACE")) + replace = true; + else if (lex_match_id (lexer, "FIELDNAMES")) + include_var_names = true; + else if (lex_match_id (lexer, "MISSING")) + { + lex_match (lexer, '='); + if (lex_match_id (lexer, "IGNORE")) + recode_user_missing = false; + else if (lex_match_id (lexer, "RECODE")) + recode_user_missing = true; + else + { + lex_error (lexer, _("expecting %s or %s"), "IGNORE", "RECODE"); + goto error; + } + } + else if (lex_match_id (lexer, "CELLS")) + { + lex_match (lexer, '='); + if (lex_match_id (lexer, "VALUES")) + use_value_labels = false; + else if (lex_match_id (lexer, "LABELS")) + use_value_labels = true; + else + { + lex_error (lexer, _("expecting %s or %s"), "VALUES", "LABELS"); + goto error; + } + } + else if (lex_match_id (lexer, "TEXTOPTIONS")) + { + lex_match (lexer, '='); + for (;;) + { + if (lex_match_id (lexer, "DELIMITER")) + { + lex_match (lexer, '='); + if (!lex_force_string (lexer)) + goto error; + if (ds_length (lex_tokstr (lexer)) != 1) + { + msg (SE, _("The %s string must contain exactly one " + "character."), "DELIMITER"); + goto error; + } + delimiter = ds_first (lex_tokstr (lexer)); + lex_get (lexer); + } + else if (lex_match_id (lexer, "QUALIFIER")) + { + lex_match (lexer, '='); + if (!lex_force_string (lexer)) + goto error; + if (ds_length (lex_tokstr (lexer)) != 1) + { + msg (SE, _("The %s string must contain exactly one " + "character."), "QUALIFIER"); + goto error; + } + qualifier = ds_first (lex_tokstr (lexer)); + lex_get (lexer); + } + else if (lex_match_id (lexer, "DECIMAL")) + { + lex_match (lexer, '='); + if (lex_match_id (lexer, "DOT")) + decimal = '.'; + else if (lex_match_id (lexer, "COMMA")) + decimal = ','; + else + { + lex_error (lexer, _("expecting %s or %s"), + "DOT", "COMMA"); + goto error; + } + } + else if (lex_match_id (lexer, "FORMAT")) + { + lex_match (lexer, '='); + if (lex_match_id (lexer, "PLAIN")) + use_print_formats = false; + else if (lex_match_id (lexer, "VARIABLE")) + use_print_formats = true; + else + { + lex_error (lexer, _("expecting %s or %s"), + "PLAIN", "VARIABLE"); + goto error; + } + } + else + break; + } + } + else if (lex_match_id (lexer, "UNSELECTED")) + { + lex_match (lexer, '='); + if (lex_match_id (lexer, "RETAIN")) + retain_unselected = true; + else if (lex_match_id (lexer, "DELETE")) + retain_unselected = false; + else + { + lex_error (lexer, _("expecting %s or %s"), "RETAIN", "DELETE"); + goto error; + } + } + else if (!parse_dict_trim (lexer, dict)) + goto error; + } + if (lex_end_of_command (lexer) != CMD_SUCCESS) + goto error; + + if (type == 0) + { + lex_sbc_missing (lexer, "TYPE"); + goto error; + } + else if (handle == NULL) + { + lex_sbc_missing (lexer, "OUTFILE"); + goto error; + } + else if (!replace && fn_exists (fh_get_file_name (handle))) + { + msg (SE, _("Output file \"%s\" exists but REPLACE was not specified."), + fh_get_file_name (handle)); + goto error; + } + + dict_delete_scratch_vars (dict); + dict_compact_values (dict); + + csv_opts.recode_user_missing = recode_user_missing; + csv_opts.include_var_names = include_var_names; + csv_opts.use_value_labels = use_value_labels; + csv_opts.use_print_formats = use_print_formats; + csv_opts.decimal = decimal; + csv_opts.delimiter = (delimiter ? delimiter + : type == TAB_FILE ? '\t' + : decimal == '.' ? ',' + : ';'); + csv_opts.qualifier = qualifier; + + writer = csv_writer_open (handle, dict, &csv_opts); + if (writer == NULL) + goto error; + fh_unref (handle); + + map = case_map_from_dict (dict); + if (map != NULL) + writer = case_map_create_output_translator (map, writer); + dict_destroy (dict); + + casereader_transfer (proc_open_filtering (ds, !retain_unselected), writer); + ok = casewriter_destroy (writer); + ok = proc_commit (ds) && ok; + + return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; + +error: + fh_unref (handle); + dict_destroy (dict); + case_map_destroy (map); + return CMD_FAILURE; +} diff --git a/tests/automake.mk b/tests/automake.mk index a2ceef6be9..6b523ce8b4 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -420,6 +420,7 @@ TESTSUITE_AT = \ tests/data/calendar.at \ tests/language/data-io/data-list.at \ tests/language/data-io/save.at \ + tests/language/data-io/save-translate.at \ tests/language/dictionary/mrsets.at \ tests/language/expressions/evaluate.at \ tests/language/stats/aggregate.at \ diff --git a/tests/language/data-io/save-translate.at b/tests/language/data-io/save-translate.at new file mode 100644 index 0000000000..408a06bfc5 --- /dev/null +++ b/tests/language/data-io/save-translate.at @@ -0,0 +1,92 @@ +AT_BANNER([SAVE TRANSLATE /TYPE=CSV]) + +m4_define([PREPARE_SAVE_TRANSLATE_CSV], [dnl +AT_KEYWORDS([SAVE TRANSLATE]) +AT_DATA([data.txt], [dnl +0 '1 9:30:05' 1/2/2003 "25/8/1995 15:30:00" "'a,b,c'",0 +, '-0 5:17' 10/31/2010 "9/4/2008 9:29:00" " xxx ",1 +1.625,'0 12:00',,,xyzzy,1 +]) +AT_DATA([save-translate.pspp], [dnl +SET DECIMAL=DOT. +DATA LIST LIST NOTABLE FILE="data.txt" + /number(F8.3) time(DTIME10) date(ADATE10) datetime(DATETIME20) string(A8) + filter(F1.0). +MISSING VALUES number(0) time('0 12:00') string('xyzzy'). +FILTER BY filter. +SAVE TRANSLATE /OUTFILE="data.csv" /TYPE=m4_if([$2], [], [CSV], [$2]) + $1. +]) +AT_CHECK([pspp -O format=csv save-translate.pspp], [0]) +]) + +AT_SETUP([CSV output -- defaults]) +PREPARE_SAVE_TRANSLATE_CSV +AT_CHECK([cat data.csv], [0], [dnl +0,33:30:05,01/02/2003,08/25/1995 15:30:00,"'a,b,c'",0 + ,-05:17:00,10/31/2010,04/09/2008 09:29:00, xxx,1 +1.625,12:00:00, , ,xyzzy,1 +]) +AT_CLEANUP + +AT_SETUP([CSV output -- recode missing, delete unselected]) +PREPARE_SAVE_TRANSLATE_CSV([/MISSING=RECODE /UNSELECTED=DELETE]) +AT_CHECK([cat data.csv], [0], [dnl + ,-05:17:00,10/31/2010,04/09/2008 09:29:00, xxx,1 +1.625, , , ,,1 +]) +AT_CLEANUP + +AT_SETUP([CSV output -- var names, formats]) +PREPARE_SAVE_TRANSLATE_CSV( + [/FIELDNAMES /TEXTOPTIONS FORMAT=VARIABLE /UNSELECTED=RETAIN]) +AT_CHECK([cat data.csv], [0], [dnl +number,time,date,datetime,string,filter +.000,1 09:30:05,01/02/2003,25-AUG-1995 15:30:00,"'a,b,c'",0 + ,-0 05:17,10/31/2010,09-APR-2008 09:29:00, xxx,1 +1.625,0 12:00:00, , ,xyzzy,1 +]) +AT_CLEANUP + +AT_SETUP([CSV output -- comma as decimal point]) +PREPARE_SAVE_TRANSLATE_CSV([/FIELDNAMES /TEXTOPTIONS DECIMAL=COMMA]) +AT_CHECK([cat data.csv], [0], [dnl +number;time;date;datetime;string;filter +0;33:30:05;01/02/2003;08/25/1995 15:30:00;'a,b,c';0 + ;-05:17:00;10/31/2010;04/09/2008 09:29:00; xxx;1 +1,625;12:00:00; ; ;xyzzy;1 +]) +AT_CLEANUP + +AT_SETUP([CSV output -- custom delimiter, qualifier]) +PREPARE_SAVE_TRANSLATE_CSV( + [/FIELDNAMES /TEXTOPTIONS DELIMITER=':' QUALIFIER="'"]) +AT_CHECK([cat data.csv], [0], [dnl +number:time:date:datetime:string:filter +0:'33:30:05':01/02/2003:'08/25/1995 15:30:00':'''a,b,c''':0 + :'-05:17:00':10/31/2010:'04/09/2008 09:29:00': xxx:1 +1.625:'12:00:00': : :xyzzy:1 +]) +AT_CLEANUP + +AT_SETUP([CSV output -- KEEP, RENAME]) +PREPARE_SAVE_TRANSLATE_CSV( + [/FIELDNAMES /KEEP=time string /RENAME string=name /UNSELECTED=DELETE]) +AT_CHECK([cat data.csv], [0], [dnl +time,name +-05:17:00, xxx +12:00:00,xyzzy +]) +AT_CLEANUP + +AT_BANNER([SAVE TRANSLATE /TYPE=TAB]) + +AT_SETUP([TAB output]) +PREPARE_SAVE_TRANSLATE_CSV([/FIELDNAMES], [TAB]) +AT_CHECK([cat data.csv], [0], [dnl +number time date datetime string filter +0 33:30:05 01/02/2003 08/25/1995 15:30:00 'a,b,c' 0 + -05:17:00 10/31/2010 04/09/2008 09:29:00 xxx 1 +1.625 12:00:00 xyzzy 1 +]) +AT_CLEANUP -- 2.30.2