#include "data/settings.h"
#include "data/sys-file-writer.h"
#include "data/file-handle-def.h"
+#include "language/command.h"
+#include "language/lexer/lexer.h"
+#include "language/lexer/variable-parser.h"
#include "libpspp/assertion.h"
#include "libpspp/cast.h"
#include "libpspp/i18n.h"
#include "gl/error.h"
#include "gl/getpass.h"
+#include "gl/localcharset.h"
#include "gl/progname.h"
#include "gl/version-etc.h"
const char *alphabet, int max_length,
const char *password_list);
+static void
+parse_character_option (const char *arg, const char *option_name, char *out)
+{
+ if (strlen (arg) != 1)
+ {
+ /* XXX support multibyte characters */
+ error (1, 0, _("%s argument must be a single character"), option_name);
+ }
+ *out = arg[0];
+}
+
+static bool
+parse_variables_option (const char *arg, struct dictionary *dict,
+ struct variable ***vars, size_t *n_vars)
+{
+ struct lexer *lexer = lex_create ();
+ lex_append (lexer, lex_reader_for_string (arg, locale_charset ()));
+ lex_get (lexer);
+
+ bool ok = parse_variables (lexer, dict, vars, n_vars, 0);
+ if (ok && (lex_token (lexer) != T_STOP && lex_token (lexer) != T_ENDCMD))
+ {
+ lex_error (lexer, _("expecting variable name"));
+ ok = false;
+ }
+
+ lex_destroy (lexer);
+ if (!ok)
+ {
+ free (*vars);
+ *vars = NULL;
+ *n_vars = 0;
+ }
+ return ok;
+}
+
int
main (int argc, char *argv[])
{
const char *output_filename;
long long int max_cases = LLONG_MAX;
+ const char *keep = NULL;
+ const char *drop = NULL;
struct dictionary *dict = NULL;
- struct casereader *reader;
+ struct casereader *reader = NULL;
struct file_handle *input_fh = NULL;
const char *encoding = NULL;
struct encrypted_file *enc;
const char *password_list = NULL;
int length = 0;
+ struct csv_writer_options csv_opts = {
+ .include_var_names = true,
+ .decimal = '.',
+ .delimiter = 0, /* The default will be set later. */
+ .qualifier = '"',
+ };
+
long long int i;
set_program_name (argv[0]);
enum
{
OPT_PASSWORD_LIST = UCHAR_MAX + 1,
+ OPT_RECODE,
+ OPT_NO_VAR_NAMES,
+ OPT_LABELS,
+ OPT_PRINT_FORMATS,
+ OPT_DECIMAL,
+ OPT_DELIMITER,
+ OPT_QUALIFIER,
};
static const struct option long_options[] =
{
- { "cases", required_argument, NULL, 'c' },
+ { "cases", required_argument, NULL, 'c' },
+ { "keep", required_argument, NULL, 'k' },
+ { "drop", required_argument, NULL, 'd' },
{ "encoding", required_argument, NULL, 'e' },
+ { "recode", no_argument, NULL, OPT_RECODE },
+ { "no-var-names", no_argument, NULL, OPT_NO_VAR_NAMES },
+ { "labels", no_argument, NULL, OPT_LABELS },
+ { "print-formats", no_argument, NULL, OPT_PRINT_FORMATS },
+ { "decimal", required_argument, NULL, OPT_DECIMAL },
+ { "delimiter", required_argument, NULL, OPT_DELIMITER },
+ { "qualifier", required_argument, NULL, OPT_QUALIFIER },
+
{ "password", required_argument, NULL, 'p' },
{ "password-alphabet", required_argument, NULL, 'a' },
{ "password-length", required_argument, NULL, 'l' },
int c;
- c = getopt_long (argc, argv, "c:e:p:a:l:O:hv", long_options, NULL);
+ c = getopt_long (argc, argv, "c:k:d:e:p:a:l:O:hv", long_options, NULL);
if (c == -1)
break;
max_cases = strtoull (optarg, NULL, 0);
break;
+ case 'k':
+ keep = optarg;
+ break;
+
+ case 'd':
+ drop = optarg;
+ break;
+
case 'e':
encoding = optarg;
break;
password_list = optarg;
break;
+ case OPT_RECODE:
+ csv_opts.recode_user_missing = true;
+ break;
+
+ case OPT_NO_VAR_NAMES:
+ csv_opts.include_var_names = false;
+ break;
+
+ case OPT_LABELS:
+ csv_opts.use_value_labels = true;
+ break;
+
+ case OPT_DECIMAL:
+ parse_character_option (optarg, "--decimal", &csv_opts.decimal);
+ break;
+
+ case OPT_DELIMITER:
+ parse_character_option (optarg, "--delimiter", &csv_opts.delimiter);
+ break;
+
+ case OPT_QUALIFIER:
+ parse_character_option (optarg, "--qualifier", &csv_opts.qualifier);
+ break;
+
case 'a':
- for (const char *p = optarg; *p; )
+ for (const char *p = optarg; *p;)
if (p[1] == '-' && p[2] > p[0])
{
for (int ch = p[0]; ch <= p[2]; ch++)
output_fh = fh_create_file (NULL, output_filename, NULL, fh_default_properties ());
if (encrypted_file_open (&enc, input_fh) > 0)
{
- if (encrypted_file_is_sav (enc))
- {
- if (strcmp (output_format, "sav") && strcmp (output_format, "sys"))
- error (1, 0, _("can only convert encrypted data file to sav or "
- "sys format"));
- }
- else
- {
- if (strcmp (output_format, "sps"))
- error (1, 0, _("can only convert encrypted syntax file to sps "
- "format"));
- }
-
- if (!decrypt_file (enc, input_fh, output_fh, password,
+ if (decrypt_file (enc, input_fh, output_fh, password,
ds_cstr (&alphabet), length, password_list))
+ goto exit;
+ else
goto error;
-
- goto exit;
}
if (reader == NULL)
goto error;
- if (!strcmp (output_format, "csv") || !strcmp (output_format, "txt"))
+ if (keep)
{
- struct csv_writer_options options;
+ struct variable **keep_vars;
+ size_t n_keep_vars;
+ if (!parse_variables_option (keep, dict, &keep_vars, &n_keep_vars))
+ goto error;
+ dict_reorder_vars (dict, keep_vars, n_keep_vars);
+ dict_delete_consecutive_vars (dict, n_keep_vars,
+ dict_get_n_vars (dict) - n_keep_vars);
+ free (keep_vars);
+ }
+
+ if (drop)
+ {
+ struct variable **drop_vars;
+ size_t n_drop_vars;
+ if (!parse_variables_option (drop, dict, &drop_vars, &n_drop_vars))
+ goto error;
+ dict_delete_vars (dict, drop_vars, n_drop_vars);
+ free (drop_vars);
+ }
- csv_writer_options_init (&options);
- options.include_var_names = true;
- writer = csv_writer_open (output_fh, dict, &options);
+ if (!strcmp (output_format, "csv") || !strcmp (output_format, "txt"))
+ {
+ if (!csv_opts.delimiter)
+ csv_opts.delimiter = csv_opts.decimal == '.' ? ',' : ';';
+ writer = csv_writer_open (output_fh, dict, &csv_opts);
}
else if (!strcmp (output_format, "sav") || !strcmp (output_format, "sys"))
{
return 0;
error:
+ casereader_destroy (reader);
ds_destroy (&alphabet);
dict_unref (dict);
fh_unref (output_fh);
is one of the extensions listed above\n\
-e, --encoding=CHARSET override encoding of input data file\n\
-c MAXCASES limit number of cases to copy (default is all cases)\n\
+ -k, --keep=VAR... include only the given variables in output\n\
+ -d, --drop=VAR... drop the given variables from output\n\
+CSV output options:\n\
+ --recode convert user-missing values to system-missing\n\
+ --no-var-names do not include variable names as first row\n\
+ --labels write value labels to output\n\
+ --print-formats honor variables' print formats\n\
+ --decimal=CHAR use CHAR as the decimal point (default: .)\n\
+ --delimiter=CHAR use CHAR to separate fields (default: ,)\n\
+ --qualifier=CHAR use CHAR to quote the delimiter (default: \")\n\
Password options (for used with encrypted files):\n\
-p PASSWORD individual password\n\
-a ALPHABET with -l, alphabet of passwords to try\n\