- New "-a", "-l", "--password-list" options to search for an
encrypted file's password.
- - New "--labels" and "--recode" options for CSV output.
+ - New "--keep" and "--drop" options to output only selected variables.
+
+ - New "--recode", "--no-var-name", "--labels", "--print-formats",
+ "--decimal", "--delimiter", and "--qualifier" options to control
+ CSV output.
* Improvements to SAVE DATA COLLECTION support for MDD files.
options to specify how to search for it, or @option{--password-list}
to specify a file of passwords to try.
-Use @code{-O format=@var{format}} to override the inferred format or to
+Use @code{-O @var{format}} to override the inferred format or to
specify the format for unrecognized extensions.
-@command{pspp-convert} accepts the following options:
+@command{pspp-convert} accepts the following general options:
@table @option
-@item @option{-O format=@var{format}}
-Overrides the format inferred from the output file's extension. Use
-@option{--help} to list the available formats. @xref{Invoking PSPP},
-for details of the available output formats.
-
-@item @option{-O @var{option}=@var{value}}
-Sets an option for the output file format. @xref{Invoking PSPP}, for
-details of the available output options.
+@item @option{-O @var{format}}
+@itemx @option{--output-format=@var{format}}
+Sets the output format, where @var{format} is one of the extensions
+listed above, e.g.: @option{-O csv}. Use @option{--help} to list
+the supported output formats.
@item -c @var{maxcases}
@itemx --cases=@var{maxcases}
interpreted. This option is necessary because old SPSS system files,
and SPSS/PC+ system files, do not self-identify their encoding.
-@item --labels
-By default, @command{pspp-convert} writes variables' values to CVS
-output files. With this option, @command{pspp-convert} writes value
-labels.
+@item -k @var{variable}@dots{}
+@itemx --keep=@var{variable}@dots{}
+By default, @command{pspp-convert} includes all the variables from the
+input file. Use this option to list specific variables to include;
+any variables not listed will be dropped. The variables in the output
+file will also be reordered into the given order. The variable list
+may use @code{TO} in the same way as in PSPP syntax, e.g.@: if the
+dictionary contains consecutive variables @code{a}, @code{b},
+@code{c}, and @code{d}, then @option{--keep='a to d'} will include all
+of them (and no others).
+
+@item -d @var{variable}@dots{}
+@itemx --drop=@var{variable}@dots{}
+Drops the specified variables from the output.
+
+When @option{--keep} and @option{--drop} are used together,
+@option{--keep} is processed first.
+
+@item -h
+@itemx --help
+Prints a usage message on stdout and exits.
+@item -v
+@itemx --version
+Prints version information on stdout and exits.
+@end table
+
+The following options affect CSV output:
+
+@table @option
@item --recode
-By default, @command{pspp-convert} writes user-missing values to CVS
+By default, @command{pspp-convert} writes user-missing values to CSV
output files as their regular values. With this option,
@command{pspp-convert} recodes them to system-missing values (which
are written as a single space).
+@item --no-var-names
+By default, @command{pspp-convert} writes the variable names as the
+first line of output. With this option, @command{pspp-convert} omits
+this line.
+
+@item --labels
+By default, @command{pspp-convert} writes variables' values to CSV
+output files. With this option, @command{pspp-convert} writes value
+labels.
+
+@item --print-formats
+By default, @command{pspp-convert} writes numeric variables as plain
+numbers. This option makes @command{pspp-convert} honor variables'
+print formats.
+
+@item --decimal=@var{decimal}
+This option sets the character used as a decimal point in output. The
+default is @samp{.}.
+
+@item --delimiter=@var{delimiter}
+This option sets the character used to separate fields in output. The
+default is @samp{,}, unless the decimal point is @samp{,}, in which
+case @samp{;} is used.
+
+@item --qualifier=@var{qualifier}
+The option sets the character used to quote fields that contain the
+delimiter. The default is @samp{"}.
+@end table
+
+The following options specify how to obtain the password for encrypted
+files:
+
+@table @option
@item -p @var{password}
@item --password=@var{password}
Specifies the password to use to decrypt an encrypted SPSS system file
@item --password-list=@var{file}
Specifies a file to read containing a list of passwords to try, one
per line. If @var{file} is @file{-}, reads from stdin.
-
-@item -h
-@itemx --help
-Prints a usage message on stdout and exits.
-
-@item -v
-@itemx --version
-Prints version information on stdout and exits.
@end table
static bool write_error (const struct csv_writer *);
static bool close_writer (struct csv_writer *);
-/* Initializes OPTS with default options for writing a CSV file. */
-void
-csv_writer_options_init (struct csv_writer_options *opts)
-{
- opts->recode_user_missing = false;
- opts->include_var_names = false;
- opts->use_value_labels = false;
- opts->use_print_formats = false;
- opts->decimal = settings_get_decimal_char (FMT_F);
- opts->delimiter = ',';
- opts->qualifier = '"';
-}
-
/* Opens the CSV file designated by file handle FH for writing cases from
dictionary DICT according to the given OPTS.
char qualifier; /* Quote character. */
};
-void csv_writer_options_init (struct csv_writer_options *);
-
struct file_handle;
struct dictionary;
struct casewriter *csv_writer_open (struct file_handle *,
struct casewriter *writer;
struct file_handle *handle;
- struct csv_writer_options csv_opts;
-
bool replace;
bool retain_unselected;
dict_delete_scratch_vars (dict);
dict_compact_values (dict);
- csv_opts.recode_user_missing = recode_user_missing;
- csv_opts.include_var_names = include_var_names;
- csv_opts.use_value_labels = use_value_labels;
- csv_opts.use_print_formats = use_print_formats;
- csv_opts.decimal = decimal;
- csv_opts.delimiter = (delimiter ? delimiter
- : type == TAB_FILE ? '\t'
- : decimal == '.' ? ','
- : ';');
- csv_opts.qualifier = qualifier;
-
+ struct csv_writer_options csv_opts = {
+ .recode_user_missing = recode_user_missing,
+ .include_var_names = include_var_names,
+ .use_value_labels = use_value_labels,
+ .use_print_formats = use_print_formats,
+ .decimal = decimal,
+ .delimiter = (delimiter ? delimiter
+ : type == TAB_FILE ? '\t'
+ : decimal == '.' ? ','
+ : ';'),
+ .qualifier = qualifier,
+ };
writer = csv_writer_open (handle, dict, &csv_opts);
if (writer == NULL)
goto error;
dist_man_MANS += utilities/pspp-convert.1
utilities_pspp_convert_SOURCES = utilities/pspp-convert.c
utilities_pspp_convert_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\"
-utilities_pspp_convert_LDADD = src/libpspp-core.la
+utilities_pspp_convert_LDADD = src/libpspp.la src/libpspp-core.la $(CAIRO_LIBS)
utilities_pspp_convert_LDFLAGS = $(PSPP_LDFLAGS) $(PG_LDFLAGS)
if RELOCATABLE_VIA_LD
interpreted. This option is necessary because old SPSS system files
do not self-identify their encoding.
.
+.IP "\fB\-k \fIvar\fR..."
+.IQ "\fB\-\-keep=\fIvar\fR..."
+Drops all variables except those listed as \fIvar\fR, and reorders the
+remaining variables into the specified order.
+.
+.IP "\fB\-d \fIvar\fR..."
+.IQ "\fB\-\-drop=\fIvar\fR..."
+Drops each \fIvar\fR listed from the output.
+.
.SS "CSV Output Options"
.PP
These options affect only output to \fB.csv\fR and \fB.txt\fR files.
.IP "\fB\-\-labels\fR"
By default, \fBpspp\-convert\fR writes variables' values to the output.
With this option, \fBpspp\-convert\fR writes value labels.
+.IP "\fB\-\-no\-var\-names\fR"
+By default, \fRpspp\-convert\fR\fR writes the variable names as the
+first line of output. With this option, \fBpspp\-convert\fR omits
+this line.
.IP "\fB\-\-recode\fR"
By default, \fBpspp\-convert\fR writes user-missing values as their
regular values. With this option, \fBpspp\-convert\fR recodes them to
system-missing values (which are written as a single space).
+
+.IP "\fB\-\-print\-formats\fR"
+By default, \fBpspp\-convert\fR writes numeric variables as plain
+numbers. This option makes \fBpspp\-convert\fR honor variables'
+print formats.
+
+.IP "\fB\-\-decimal=\fIdecimal\fR"
+This option sets the character used as a decimal point in output. The
+default is a period (\fB.\fR).
+
+.IP "\fB\-\-delimiter=\fIdelimiter\fR"
+This option sets the character used to separate fields in output. The
+default is a comma (\fB,\fR), unless the decimal point is a comma, in
+which case a semicolon (\fB;\fR) is used.
+
+.IP "\fB\-\-qualifier=\fIqualifier\fR"
+The option sets the character used to quote fields that contain the
+delimiter. The default is a double quote (\fB\(dq\fR).
.
.SS "Password Options"
When the input file is encrypted, \fBpspp\-convert\fR needs to obtain
a password to decrypt it. To do so, the user may specify the password
-with \f\-p\fR (or \fB\-\-password), or the name of a file containing a
+with \f\-p\fR (or \fB\-\-password\fR), or the name of a file containing a
list of possible passwords with \fB\-\-password\-list\fR, or an
alphabet of possible passwords to try along with a maximum length with
\fB\-a\fR (or \fB\-\-password\-alphabet\fR) and \fB\-l\fR (or
#include "data/settings.h"
#include "data/sys-file-writer.h"
#include "data/file-handle-def.h"
+#include "language/command.h"
+#include "language/lexer/lexer.h"
+#include "language/lexer/variable-parser.h"
#include "libpspp/assertion.h"
#include "libpspp/cast.h"
#include "libpspp/i18n.h"
#include "gl/error.h"
#include "gl/getpass.h"
+#include "gl/localcharset.h"
#include "gl/progname.h"
#include "gl/version-etc.h"
const char *alphabet, int max_length,
const char *password_list);
+static void
+parse_character_option (const char *arg, const char *option_name, char *out)
+{
+ if (strlen (arg) != 1)
+ {
+ /* XXX support multibyte characters */
+ error (1, 0, _("%s argument must be a single character"), option_name);
+ }
+ *out = arg[0];
+}
+
+static bool
+parse_variables_option (const char *arg, struct dictionary *dict,
+ struct variable ***vars, size_t *n_vars)
+{
+ struct lexer *lexer = lex_create ();
+ lex_append (lexer, lex_reader_for_string (arg, locale_charset ()));
+ lex_get (lexer);
+
+ bool ok = parse_variables (lexer, dict, vars, n_vars, 0);
+ if (ok && (lex_token (lexer) != T_STOP && lex_token (lexer) != T_ENDCMD))
+ {
+ lex_error (lexer, _("expecting variable name"));
+ ok = false;
+ }
+
+ lex_destroy (lexer);
+ if (!ok)
+ {
+ free (*vars);
+ *vars = NULL;
+ *n_vars = 0;
+ }
+ return ok;
+}
+
int
main (int argc, char *argv[])
{
const char *output_filename;
long long int max_cases = LLONG_MAX;
+ const char *keep = NULL;
+ const char *drop = NULL;
struct dictionary *dict = NULL;
- struct casereader *reader;
+ struct casereader *reader = NULL;
struct file_handle *input_fh = NULL;
const char *encoding = NULL;
struct encrypted_file *enc;
const char *password_list = NULL;
int length = 0;
- bool recode_user_missing = false;
- bool use_value_labels = false;
+ struct csv_writer_options csv_opts = {
+ .include_var_names = true,
+ .decimal = '.',
+ .delimiter = 0, /* The default will be set later. */
+ .qualifier = '"',
+ };
long long int i;
enum
{
OPT_PASSWORD_LIST = UCHAR_MAX + 1,
- OPT_LABELS,
OPT_RECODE,
+ OPT_NO_VAR_NAMES,
+ OPT_LABELS,
+ OPT_PRINT_FORMATS,
+ OPT_DECIMAL,
+ OPT_DELIMITER,
+ OPT_QUALIFIER,
};
static const struct option long_options[] =
{
- { "cases", required_argument, NULL, 'c' },
+ { "cases", required_argument, NULL, 'c' },
+ { "keep", required_argument, NULL, 'k' },
+ { "drop", required_argument, NULL, 'd' },
{ "encoding", required_argument, NULL, 'e' },
- { "labels", no_argument, NULL, OPT_LABELS },
{ "recode", no_argument, NULL, OPT_RECODE },
+ { "no-var-names", no_argument, NULL, OPT_NO_VAR_NAMES },
+ { "labels", no_argument, NULL, OPT_LABELS },
+ { "print-formats", no_argument, NULL, OPT_PRINT_FORMATS },
+ { "decimal", required_argument, NULL, OPT_DECIMAL },
+ { "delimiter", required_argument, NULL, OPT_DELIMITER },
+ { "qualifier", required_argument, NULL, OPT_QUALIFIER },
{ "password", required_argument, NULL, 'p' },
{ "password-alphabet", required_argument, NULL, 'a' },
int c;
- c = getopt_long (argc, argv, "c:e:p:a:l:O:hv", long_options, NULL);
+ c = getopt_long (argc, argv, "c:k:d:e:p:a:l:O:hv", long_options, NULL);
if (c == -1)
break;
max_cases = strtoull (optarg, NULL, 0);
break;
+ case 'k':
+ keep = optarg;
+ break;
+
+ case 'd':
+ drop = optarg;
+ break;
+
case 'e':
encoding = optarg;
break;
password_list = optarg;
break;
+ case OPT_RECODE:
+ csv_opts.recode_user_missing = true;
+ break;
+
+ case OPT_NO_VAR_NAMES:
+ csv_opts.include_var_names = false;
+ break;
+
case OPT_LABELS:
- use_value_labels = true;
+ csv_opts.use_value_labels = true;
break;
- case OPT_RECODE:
- recode_user_missing = true;
+ case OPT_DECIMAL:
+ parse_character_option (optarg, "--decimal", &csv_opts.decimal);
+ break;
+
+ case OPT_DELIMITER:
+ parse_character_option (optarg, "--delimiter", &csv_opts.delimiter);
+ break;
+
+ case OPT_QUALIFIER:
+ parse_character_option (optarg, "--qualifier", &csv_opts.qualifier);
break;
case 'a':
if (reader == NULL)
goto error;
- if (!strcmp (output_format, "csv") || !strcmp (output_format, "txt"))
+ if (keep)
+ {
+ struct variable **keep_vars;
+ size_t n_keep_vars;
+ if (!parse_variables_option (keep, dict, &keep_vars, &n_keep_vars))
+ goto error;
+ dict_reorder_vars (dict, keep_vars, n_keep_vars);
+ dict_delete_consecutive_vars (dict, n_keep_vars,
+ dict_get_var_cnt (dict) - n_keep_vars);
+ free (keep_vars);
+ }
+
+ if (drop)
{
- struct csv_writer_options options;
+ struct variable **drop_vars;
+ size_t n_drop_vars;
+ if (!parse_variables_option (drop, dict, &drop_vars, &n_drop_vars))
+ goto error;
+ dict_delete_vars (dict, drop_vars, n_drop_vars);
+ free (drop_vars);
+ }
- csv_writer_options_init (&options);
- options.include_var_names = true;
- options.use_value_labels = use_value_labels;
- options.recode_user_missing = recode_user_missing;
- writer = csv_writer_open (output_fh, dict, &options);
+ if (!strcmp (output_format, "csv") || !strcmp (output_format, "txt"))
+ {
+ if (!csv_opts.delimiter)
+ csv_opts.delimiter = csv_opts.decimal == '.' ? ',' : ';';
+ writer = csv_writer_open (output_fh, dict, &csv_opts);
}
else if (!strcmp (output_format, "sav") || !strcmp (output_format, "sys"))
{
return 0;
error:
+ casereader_destroy (reader);
ds_destroy (&alphabet);
dict_unref (dict);
fh_unref (output_fh);
is one of the extensions listed above\n\
-e, --encoding=CHARSET override encoding of input data file\n\
-c MAXCASES limit number of cases to copy (default is all cases)\n\
+ -k, --keep=VAR... include only the given variables in output\n\
+ -d, --drop=VAR... drop the given variables from output\n\
CSV output options:\n\
- --labels write value labels to output\n\
--recode convert user-missing values to system-missing\n\
+ --no-var-names do not include variable names as first row\n\
+ --labels write value labels to output\n\
+ --print-formats honor variables' print formats\n\
+ --decimal=CHAR use CHAR as the decimal point (default: .)\n\
+ --delimiter=CHAR use CHAR to separate fields (default: ,)\n\
+ --qualifier=CHAR use CHAR to quote the delimiter (default: \")\n\
Password options (for used with encrypted files):\n\
-p PASSWORD individual password\n\
-a ALPHABET with -l, alphabet of passwords to try\n\