X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=utilities%2Fpspp-convert.c;h=99a254ba44972f3814afa0534b53f0fc4402cc1e;hb=refs%2Fheads%2Fctables7;hp=7eb6b556684e79e20317112be9b1a6a9ed919603;hpb=c64c9e72a7040c8b36aa8709848efc5c37b7b72e;p=pspp diff --git a/utilities/pspp-convert.c b/utilities/pspp-convert.c index 7eb6b55668..99a254ba44 100644 --- a/utilities/pspp-convert.c +++ b/utilities/pspp-convert.c @@ -33,12 +33,16 @@ #include "data/settings.h" #include "data/sys-file-writer.h" #include "data/file-handle-def.h" +#include "language/command.h" +#include "language/lexer/lexer.h" +#include "language/lexer/variable-parser.h" #include "libpspp/assertion.h" #include "libpspp/cast.h" #include "libpspp/i18n.h" #include "gl/error.h" #include "gl/getpass.h" +#include "gl/localcharset.h" #include "gl/progname.h" #include "gl/version-etc.h" @@ -51,7 +55,44 @@ static bool decrypt_file (struct encrypted_file *enc, const struct file_handle *input_filename, const struct file_handle *output_filename, const char *password, - const char *alphabet, int max_length); + const char *alphabet, int max_length, + const char *password_list); + +static void +parse_character_option (const char *arg, const char *option_name, char *out) +{ + if (strlen (arg) != 1) + { + /* XXX support multibyte characters */ + error (1, 0, _("%s argument must be a single character"), option_name); + } + *out = arg[0]; +} + +static bool +parse_variables_option (const char *arg, struct dictionary *dict, + struct variable ***vars, size_t *n_vars) +{ + struct lexer *lexer = lex_create (); + lex_append (lexer, lex_reader_for_string (arg, locale_charset ())); + lex_get (lexer); + + bool ok = parse_variables (lexer, dict, vars, n_vars, 0); + if (ok && (lex_token (lexer) != T_STOP && lex_token (lexer) != T_ENDCMD)) + { + lex_error (lexer, _("expecting variable name")); + ok = false; + } + + lex_destroy (lexer); + if (!ok) + { + free (*vars); + *vars = NULL; + *n_vars = 0; + } + return ok; +} int main (int argc, char *argv[]) @@ -60,8 +101,10 @@ main (int argc, char *argv[]) const char *output_filename; long long int max_cases = LLONG_MAX; + const char *keep = NULL; + const char *drop = NULL; struct dictionary *dict = NULL; - struct casereader *reader; + struct casereader *reader = NULL; struct file_handle *input_fh = NULL; const char *encoding = NULL; struct encrypted_file *enc; @@ -71,8 +114,16 @@ main (int argc, char *argv[]) struct casewriter *writer; const char *password = NULL; struct string alphabet = DS_EMPTY_INITIALIZER; + const char *password_list = NULL; int length = 0; + struct csv_writer_options csv_opts = { + .include_var_names = true, + .decimal = '.', + .delimiter = 0, /* The default will be set later. */ + .qualifier = '"', + }; + long long int i; set_program_name (argv[0]); @@ -82,14 +133,36 @@ main (int argc, char *argv[]) for (;;) { + enum + { + OPT_PASSWORD_LIST = UCHAR_MAX + 1, + OPT_RECODE, + OPT_NO_VAR_NAMES, + OPT_LABELS, + OPT_PRINT_FORMATS, + OPT_DECIMAL, + OPT_DELIMITER, + OPT_QUALIFIER, + }; static const struct option long_options[] = { - { "cases", required_argument, NULL, 'c' }, + { "cases", required_argument, NULL, 'c' }, + { "keep", required_argument, NULL, 'k' }, + { "drop", required_argument, NULL, 'd' }, { "encoding", required_argument, NULL, 'e' }, + { "recode", no_argument, NULL, OPT_RECODE }, + { "no-var-names", no_argument, NULL, OPT_NO_VAR_NAMES }, + { "labels", no_argument, NULL, OPT_LABELS }, + { "print-formats", no_argument, NULL, OPT_PRINT_FORMATS }, + { "decimal", required_argument, NULL, OPT_DECIMAL }, + { "delimiter", required_argument, NULL, OPT_DELIMITER }, + { "qualifier", required_argument, NULL, OPT_QUALIFIER }, + { "password", required_argument, NULL, 'p' }, { "password-alphabet", required_argument, NULL, 'a' }, { "password-length", required_argument, NULL, 'l' }, + { "password-list", required_argument, NULL, OPT_PASSWORD_LIST }, { "output-format", required_argument, NULL, 'O' }, @@ -100,7 +173,7 @@ main (int argc, char *argv[]) int c; - c = getopt_long (argc, argv, "c:e:p:a:l:O:hv", long_options, NULL); + c = getopt_long (argc, argv, "c:k:d:e:p:a:l:O:hv", long_options, NULL); if (c == -1) break; @@ -110,6 +183,14 @@ main (int argc, char *argv[]) max_cases = strtoull (optarg, NULL, 0); break; + case 'k': + keep = optarg; + break; + + case 'd': + drop = optarg; + break; + case 'e': encoding = optarg; break; @@ -122,8 +203,36 @@ main (int argc, char *argv[]) length = atoi (optarg); break; + case OPT_PASSWORD_LIST: + password_list = optarg; + break; + + case OPT_RECODE: + csv_opts.recode_user_missing = true; + break; + + case OPT_NO_VAR_NAMES: + csv_opts.include_var_names = false; + break; + + case OPT_LABELS: + csv_opts.use_value_labels = true; + break; + + case OPT_DECIMAL: + parse_character_option (optarg, "--decimal", &csv_opts.decimal); + break; + + case OPT_DELIMITER: + parse_character_option (optarg, "--delimiter", &csv_opts.delimiter); + break; + + case OPT_QUALIFIER: + parse_character_option (optarg, "--qualifier", &csv_opts.qualifier); + break; + case 'a': - for (const char *p = optarg; *p; ) + for (const char *p = optarg; *p;) if (p[1] == '-' && p[2] > p[0]) { for (int ch = p[0]; ch <= p[2]; ch++) @@ -173,24 +282,11 @@ main (int argc, char *argv[]) output_fh = fh_create_file (NULL, output_filename, NULL, fh_default_properties ()); if (encrypted_file_open (&enc, input_fh) > 0) { - if (encrypted_file_is_sav (enc)) - { - if (strcmp (output_format, "sav") && strcmp (output_format, "sys")) - error (1, 0, _("can only convert encrypted data file to sav or " - "sys format")); - } + if (decrypt_file (enc, input_fh, output_fh, password, + ds_cstr (&alphabet), length, password_list)) + goto exit; else - { - if (strcmp (output_format, "sps")) - error (1, 0, _("can only convert encrypted syntax file to sps " - "format")); - } - - if (!decrypt_file (enc, input_fh, output_fh, password, - ds_cstr (&alphabet), length)) goto error; - - goto exit; } @@ -198,13 +294,33 @@ main (int argc, char *argv[]) if (reader == NULL) goto error; - if (!strcmp (output_format, "csv") || !strcmp (output_format, "txt")) + if (keep) { - struct csv_writer_options options; + struct variable **keep_vars; + size_t n_keep_vars; + if (!parse_variables_option (keep, dict, &keep_vars, &n_keep_vars)) + goto error; + dict_reorder_vars (dict, keep_vars, n_keep_vars); + dict_delete_consecutive_vars (dict, n_keep_vars, + dict_get_n_vars (dict) - n_keep_vars); + free (keep_vars); + } - csv_writer_options_init (&options); - options.include_var_names = true; - writer = csv_writer_open (output_fh, dict, &options); + if (drop) + { + struct variable **drop_vars; + size_t n_drop_vars; + if (!parse_variables_option (drop, dict, &drop_vars, &n_drop_vars)) + goto error; + dict_delete_vars (dict, drop_vars, n_drop_vars); + free (drop_vars); + } + + if (!strcmp (output_format, "csv") || !strcmp (output_format, "txt")) + { + if (!csv_opts.delimiter) + csv_opts.delimiter = csv_opts.decimal == '.' ? ',' : ';'; + writer = csv_writer_open (output_fh, dict, &csv_opts); } else if (!strcmp (output_format, "sav") || !strcmp (output_format, "sys")) { @@ -256,6 +372,7 @@ exit: return 0; error: + casereader_destroy (reader); ds_destroy (&alphabet); dict_unref (dict); fh_unref (output_fh); @@ -272,14 +389,62 @@ decrypt_file (struct encrypted_file *enc, const struct file_handle *ofh, const char *password, const char *alphabet, - int max_length) + int max_length, + const char *password_list) { FILE *out; int err; const char *input_filename = fh_get_file_name (ifh); const char *output_filename = fh_get_file_name (ofh); - if (alphabet[0] && max_length) + if (password_list) + { + FILE *password_file; + if (!strcmp (password_list, "-")) + password_file = stdin; + else + { + password_file = fopen (password_list, "r"); + if (!password_file) + error (1, errno, _("%s: error opening password file"), + password_list); + } + + struct string pw = DS_EMPTY_INITIALIZER; + unsigned int target = 100000; + for (unsigned int i = 0; ; i++) + { + ds_clear (&pw); + if (!ds_read_line (&pw, password_file, SIZE_MAX)) + { + if (isatty (STDOUT_FILENO)) + { + putchar ('\r'); + fflush (stdout); + } + error (1, 0, _("\n%s: password not in file"), password_list); + } + ds_chomp_byte (&pw, '\n'); + + if (i >= target) + { + target += 100000; + if (isatty (STDOUT_FILENO)) + { + printf ("\r%u", i); + fflush (stdout); + } + } + + if (encrypted_file_unlock__ (enc, ds_cstr (&pw))) + { + printf ("\npassword is: \"%s\"\n", ds_cstr (&pw)); + password = ds_cstr (&pw); + break; + } + } + } + else if (alphabet[0] && max_length) { size_t alphabet_size = strlen (alphabet); char *pw = xmalloc (max_length + 1); @@ -404,12 +569,27 @@ The desired format of OUTPUT is by default inferred from its extension:\n\ por SPSS portable file\n\ sps SPSS syntax file (encrypted syntax input files only)\n\ \n\ -Options:\n\ +General options:\n\ -O, --output-format=FORMAT set specific output format, where FORMAT\n\ is one of the extensions listed above\n\ -e, --encoding=CHARSET override encoding of input data file\n\ -c MAXCASES limit number of cases to copy (default is all cases)\n\ - -p PASSWORD password for encrypted files\n\ + -k, --keep=VAR... include only the given variables in output\n\ + -d, --drop=VAR... drop the given variables from output\n\ +CSV output options:\n\ + --recode convert user-missing values to system-missing\n\ + --no-var-names do not include variable names as first row\n\ + --labels write value labels to output\n\ + --print-formats honor variables' print formats\n\ + --decimal=CHAR use CHAR as the decimal point (default: .)\n\ + --delimiter=CHAR use CHAR to separate fields (default: ,)\n\ + --qualifier=CHAR use CHAR to quote the delimiter (default: \")\n\ +Password options (for used with encrypted files):\n\ + -p PASSWORD individual password\n\ + -a ALPHABET with -l, alphabet of passwords to try\n\ + -l MAX-LENGTH with -a, maximum number of characters to try\n\ + --password-list=FILE try all of the passwords in FILE (one per line)\n\ +Other options:\n\ --help display this help and exit\n\ --version output version information and exit\n", program_name, program_name);