1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2013, 2014, 2015, 2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "data/any-reader.h"
26 #include "data/case-map.h"
27 #include "data/casereader.h"
28 #include "data/casewriter.h"
29 #include "data/csv-file-writer.h"
30 #include "data/dictionary.h"
31 #include "data/encrypted-file.h"
32 #include "data/file-name.h"
33 #include "data/por-file-writer.h"
34 #include "data/settings.h"
35 #include "data/sys-file-writer.h"
36 #include "data/file-handle-def.h"
37 #include "language/command.h"
38 #include "language/lexer/lexer.h"
39 #include "language/lexer/variable-parser.h"
40 #include "libpspp/assertion.h"
41 #include "libpspp/cast.h"
42 #include "libpspp/i18n.h"
45 #include "gl/getpass.h"
46 #include "gl/localcharset.h"
47 #include "gl/progname.h"
48 #include "gl/version-etc.h"
51 #define _(msgid) gettext (msgid)
53 static void usage (void);
55 static bool decrypt_file (struct encrypted_file *enc,
56 const struct file_handle *input_filename,
57 const struct file_handle *output_filename,
59 const char *alphabet, int max_length,
60 const char *password_list);
63 parse_character_option (const char *arg, const char *option_name, char *out)
65 if (strlen (arg) != 1)
67 /* XXX support multibyte characters */
68 error (1, 0, _("%s argument must be a single character"), option_name);
74 parse_variables_option (const char *arg, struct dictionary *dict,
75 struct variable ***vars, size_t *n_vars)
77 struct lexer *lexer = lex_create ();
78 lex_append (lexer, lex_reader_for_string (arg, locale_charset ()));
81 bool ok = parse_variables (lexer, dict, vars, n_vars, 0);
82 if (ok && (lex_token (lexer) != T_STOP && lex_token (lexer) != T_ENDCMD))
84 lex_error (lexer, _("Syntax error expecting variable name."));
99 main (int argc, char *argv[])
101 const char *input_filename;
102 const char *output_filename;
104 long long int max_cases = LLONG_MAX;
105 const char *keep = NULL;
106 const char *drop = NULL;
107 struct dictionary *dict = NULL;
108 struct casereader *reader = NULL;
109 struct file_handle *input_fh = NULL;
110 const char *encoding = NULL;
111 struct encrypted_file *enc;
113 const char *output_format = NULL;
114 struct file_handle *output_fh = NULL;
115 struct casewriter *writer;
116 const char *password = NULL;
117 struct string alphabet = DS_EMPTY_INITIALIZER;
118 const char *password_list = NULL;
121 struct csv_writer_options csv_opts = {
122 .include_var_names = true,
124 .delimiter = 0, /* The default will be set later. */
130 set_program_name (argv[0]);
139 OPT_PASSWORD_LIST = UCHAR_MAX + 1,
148 static const struct option long_options[] =
150 { "cases", required_argument, NULL, 'c' },
151 { "keep", required_argument, NULL, 'k' },
152 { "drop", required_argument, NULL, 'd' },
153 { "encoding", required_argument, NULL, 'e' },
155 { "recode", no_argument, NULL, OPT_RECODE },
156 { "no-var-names", no_argument, NULL, OPT_NO_VAR_NAMES },
157 { "labels", no_argument, NULL, OPT_LABELS },
158 { "print-formats", no_argument, NULL, OPT_PRINT_FORMATS },
159 { "decimal", required_argument, NULL, OPT_DECIMAL },
160 { "delimiter", required_argument, NULL, OPT_DELIMITER },
161 { "qualifier", required_argument, NULL, OPT_QUALIFIER },
163 { "password", required_argument, NULL, 'p' },
164 { "password-alphabet", required_argument, NULL, 'a' },
165 { "password-length", required_argument, NULL, 'l' },
166 { "password-list", required_argument, NULL, OPT_PASSWORD_LIST },
168 { "output-format", required_argument, NULL, 'O' },
170 { "help", no_argument, NULL, 'h' },
171 { "version", no_argument, NULL, 'v' },
172 { NULL, 0, NULL, 0 },
177 c = getopt_long (argc, argv, "c:k:d:e:p:a:l:O:hv", long_options, NULL);
184 max_cases = strtoull (optarg, NULL, 0);
204 length = atoi (optarg);
207 case OPT_PASSWORD_LIST:
208 password_list = optarg;
212 csv_opts.recode_user_missing = true;
215 case OPT_NO_VAR_NAMES:
216 csv_opts.include_var_names = false;
220 csv_opts.use_value_labels = true;
224 parse_character_option (optarg, "--decimal", &csv_opts.decimal);
228 parse_character_option (optarg, "--delimiter", &csv_opts.delimiter);
232 parse_character_option (optarg, "--qualifier", &csv_opts.qualifier);
236 for (const char *p = optarg; *p;)
237 if (p[1] == '-' && p[2] > p[0])
239 for (int ch = p[0]; ch <= p[2]; ch++)
240 ds_put_byte (&alphabet, ch);
244 ds_put_byte (&alphabet, *p++);
248 output_format = optarg;
252 version_etc (stdout, "pspp-convert", PACKAGE_NAME, PACKAGE_VERSION,
253 "Ben Pfaff", "John Darrington", NULL_SENTINEL);
265 if (optind + 2 != argc)
266 error (1, 0, _("exactly two non-option arguments are required; "
267 "use --help for help"));
269 input_filename = argv[optind];
270 output_filename = argv[optind + 1];
271 input_fh = fh_create_file (NULL, input_filename, NULL, fh_default_properties ());
273 if (output_format == NULL)
275 const char *dot = strrchr (output_filename, '.');
277 error (1, 0, _("%s: cannot guess output format (use -O option)"),
280 output_format = dot + 1;
283 output_fh = fh_create_file (NULL, output_filename, NULL, fh_default_properties ());
284 if (encrypted_file_open (&enc, input_fh) > 0)
286 if (decrypt_file (enc, input_fh, output_fh, password,
287 ds_cstr (&alphabet), length, password_list))
294 reader = any_reader_open_and_decode (input_fh, encoding, &dict, NULL);
298 struct case_map_stage *stage = case_map_stage_create (dict);
301 struct variable **keep_vars;
303 if (!parse_variables_option (keep, dict, &keep_vars, &n_keep_vars))
305 dict_reorder_vars (dict, keep_vars, n_keep_vars);
306 dict_delete_consecutive_vars (dict, n_keep_vars,
307 dict_get_n_vars (dict) - n_keep_vars);
313 struct variable **drop_vars;
315 if (!parse_variables_option (drop, dict, &drop_vars, &n_drop_vars))
317 dict_delete_vars (dict, drop_vars, n_drop_vars);
321 reader = case_map_create_input_translator (
322 case_map_stage_to_case_map (stage), reader);
324 if (!strcmp (output_format, "csv") || !strcmp (output_format, "txt"))
326 if (!csv_opts.delimiter)
327 csv_opts.delimiter = csv_opts.decimal == '.' ? ',' : ';';
328 writer = csv_writer_open (output_fh, dict, &csv_opts);
330 else if (!strcmp (output_format, "sav") || !strcmp (output_format, "sys"))
332 struct sfm_write_options options;
334 options = sfm_writer_default_options ();
335 writer = sfm_open_writer (output_fh, dict, options);
337 else if (!strcmp (output_format, "por"))
339 struct pfm_write_options options;
341 options = pfm_writer_default_options ();
342 writer = pfm_open_writer (output_fh, dict, options);
346 error (1, 0, _("%s: unknown output format (use -O option)"),
351 error (1, 0, _("%s: error opening output file"), output_filename);
353 for (i = 0; i < max_cases; i++)
357 c = casereader_read (reader);
361 casewriter_write (writer, c);
364 if (!casereader_destroy (reader))
365 error (1, 0, _("%s: error reading input file"), input_filename);
366 if (!casewriter_destroy (writer))
367 error (1, 0, _("%s: error writing output file"), output_filename);
370 ds_destroy (&alphabet);
372 fh_unref (output_fh);
380 casereader_destroy (reader);
381 ds_destroy (&alphabet);
383 fh_unref (output_fh);
392 decrypt_file (struct encrypted_file *enc,
393 const struct file_handle *ifh,
394 const struct file_handle *ofh,
395 const char *password,
396 const char *alphabet,
398 const char *password_list)
402 const char *input_filename = fh_get_file_name (ifh);
403 const char *output_filename = fh_get_file_name (ofh);
408 if (!strcmp (password_list, "-"))
409 password_file = stdin;
412 password_file = fopen (password_list, "r");
414 error (1, errno, _("%s: error opening password file"),
418 struct string pw = DS_EMPTY_INITIALIZER;
419 unsigned int target = 100000;
420 for (unsigned int i = 0; ; i++)
423 if (!ds_read_line (&pw, password_file, SIZE_MAX))
425 if (isatty (STDOUT_FILENO))
430 error (1, 0, _("\n%s: password not in file"), password_list);
432 ds_chomp_byte (&pw, '\n');
437 if (isatty (STDOUT_FILENO))
444 if (encrypted_file_unlock__ (enc, ds_cstr (&pw)))
446 printf ("\npassword is: \"%s\"\n", ds_cstr (&pw));
447 password = ds_cstr (&pw);
452 else if (alphabet[0] && max_length)
454 size_t alphabet_size = strlen (alphabet);
455 char *pw = xmalloc (max_length + 1);
456 int *indexes = xzalloc (max_length * sizeof *indexes);
458 for (int len = password ? strlen (password) : 0;
459 len <= max_length; len++)
461 if (password && len == strlen (password))
463 for (int i = 0; i < len; i++)
465 const char *p = strchr (alphabet, password[i]);
467 error (1, 0, _("%s: '%c' is not in alphabet"),
468 password, password[i]);
469 indexes[i] = p - alphabet;
475 memset (indexes, 0, len * sizeof *indexes);
476 for (int i = 0; i < len; i++)
481 unsigned int target = 0;
482 for (unsigned int j = 0; ; j++)
487 if (isatty (STDOUT_FILENO))
489 printf ("\rlength %d: %s", len, pw);
493 if (encrypted_file_unlock__ (enc, pw))
495 printf ("\npassword is: \"%s\"\n", pw);
501 for (i = 0; i < len; i++)
502 if (++indexes[i] < alphabet_size)
504 pw[i] = alphabet[indexes[i]];
510 pw[i] = alphabet[indexes[i]];
523 if (password == NULL)
525 password = getpass ("password: ");
526 if (password == NULL)
530 if (!encrypted_file_unlock (enc, password))
531 error (1, 0, _("sorry, wrong password"));
534 out = fn_open (ofh, "wb");
536 error (1, errno, ("%s: error opening output file"), output_filename);
540 uint8_t buffer[1024];
543 n = encrypted_file_read (enc, buffer, sizeof buffer);
547 if (fwrite (buffer, 1, n, out) != n)
548 error (1, errno, ("%s: write error"), output_filename);
551 err = encrypted_file_close (enc);
553 error (1, err, ("%s: read error"), input_filename);
555 if (fflush (out) == EOF)
556 error (1, errno, ("%s: write error"), output_filename);
566 %s, a utility for converting SPSS data files to other formats.\n\
567 Usage: %s [OPTION]... INPUT OUTPUT\n\
568 where INPUT is an SPSS data file or encrypted syntax file\n\
569 and OUTPUT is the name of the desired output file.\n\
571 The desired format of OUTPUT is by default inferred from its extension:\n\
572 csv txt comma-separated value\n\
573 sav sys SPSS system file\n\
574 por SPSS portable file\n\
575 sps SPSS syntax file (encrypted syntax input files only)\n\
578 -O, --output-format=FORMAT set specific output format, where FORMAT\n\
579 is one of the extensions listed above\n\
580 -e, --encoding=CHARSET override encoding of input data file\n\
581 -c MAXCASES limit number of cases to copy (default is all cases)\n\
582 -k, --keep=VAR... include only the given variables in output\n\
583 -d, --drop=VAR... drop the given variables from output\n\
584 CSV output options:\n\
585 --recode convert user-missing values to system-missing\n\
586 --no-var-names do not include variable names as first row\n\
587 --labels write value labels to output\n\
588 --print-formats honor variables' print formats\n\
589 --decimal=CHAR use CHAR as the decimal point (default: .)\n\
590 --delimiter=CHAR use CHAR to separate fields (default: ,)\n\
591 --qualifier=CHAR use CHAR to quote the delimiter (default: \")\n\
592 Password options (for used with encrypted files):\n\
593 -p PASSWORD individual password\n\
594 -a ALPHABET with -l, alphabet of passwords to try\n\
595 -l MAX-LENGTH with -a, maximum number of characters to try\n\
596 --password-list=FILE try all of the passwords in FILE (one per line)\n\
598 --help display this help and exit\n\
599 --version output version information and exit\n",
600 program_name, program_name);