1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2013, 2014, 2015, 2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "data/any-reader.h"
26 #include "data/casereader.h"
27 #include "data/casewriter.h"
28 #include "data/csv-file-writer.h"
29 #include "data/dictionary.h"
30 #include "data/encrypted-file.h"
31 #include "data/file-name.h"
32 #include "data/por-file-writer.h"
33 #include "data/settings.h"
34 #include "data/sys-file-writer.h"
35 #include "data/file-handle-def.h"
36 #include "language/command.h"
37 #include "language/lexer/lexer.h"
38 #include "language/lexer/variable-parser.h"
39 #include "libpspp/assertion.h"
40 #include "libpspp/cast.h"
41 #include "libpspp/i18n.h"
44 #include "gl/getpass.h"
45 #include "gl/localcharset.h"
46 #include "gl/progname.h"
47 #include "gl/version-etc.h"
50 #define _(msgid) gettext (msgid)
52 static void usage (void);
54 static bool decrypt_file (struct encrypted_file *enc,
55 const struct file_handle *input_filename,
56 const struct file_handle *output_filename,
58 const char *alphabet, int max_length,
59 const char *password_list);
62 parse_character_option (const char *arg, const char *option_name, char *out)
64 if (strlen (arg) != 1)
66 /* XXX support multibyte characters */
67 error (1, 0, _("%s argument must be a single character"), option_name);
73 parse_variables_option (const char *arg, struct dictionary *dict,
74 struct variable ***vars, size_t *n_vars)
76 struct lexer *lexer = lex_create ();
77 lex_append (lexer, lex_reader_for_string (arg, locale_charset ()));
80 bool ok = parse_variables (lexer, dict, vars, n_vars, 0);
81 if (ok && (lex_token (lexer) != T_STOP && lex_token (lexer) != T_ENDCMD))
83 lex_error (lexer, _("expecting variable name"));
98 main (int argc, char *argv[])
100 const char *input_filename;
101 const char *output_filename;
103 long long int max_cases = LLONG_MAX;
104 const char *keep = NULL;
105 const char *drop = NULL;
106 struct dictionary *dict = NULL;
107 struct casereader *reader = NULL;
108 struct file_handle *input_fh = NULL;
109 const char *encoding = NULL;
110 struct encrypted_file *enc;
112 const char *output_format = NULL;
113 struct file_handle *output_fh = NULL;
114 struct casewriter *writer;
115 const char *password = NULL;
116 struct string alphabet = DS_EMPTY_INITIALIZER;
117 const char *password_list = NULL;
120 struct csv_writer_options csv_opts = {
121 .include_var_names = true,
123 .delimiter = 0, /* The default will be set later. */
129 set_program_name (argv[0]);
138 OPT_PASSWORD_LIST = UCHAR_MAX + 1,
147 static const struct option long_options[] =
149 { "cases", required_argument, NULL, 'c' },
150 { "keep", required_argument, NULL, 'k' },
151 { "drop", required_argument, NULL, 'd' },
152 { "encoding", required_argument, NULL, 'e' },
154 { "recode", no_argument, NULL, OPT_RECODE },
155 { "no-var-names", no_argument, NULL, OPT_NO_VAR_NAMES },
156 { "labels", no_argument, NULL, OPT_LABELS },
157 { "print-formats", no_argument, NULL, OPT_PRINT_FORMATS },
158 { "decimal", required_argument, NULL, OPT_DECIMAL },
159 { "delimiter", required_argument, NULL, OPT_DELIMITER },
160 { "qualifier", required_argument, NULL, OPT_QUALIFIER },
162 { "password", required_argument, NULL, 'p' },
163 { "password-alphabet", required_argument, NULL, 'a' },
164 { "password-length", required_argument, NULL, 'l' },
165 { "password-list", required_argument, NULL, OPT_PASSWORD_LIST },
167 { "output-format", required_argument, NULL, 'O' },
169 { "help", no_argument, NULL, 'h' },
170 { "version", no_argument, NULL, 'v' },
171 { NULL, 0, NULL, 0 },
176 c = getopt_long (argc, argv, "c:k:d:e:p:a:l:O:hv", long_options, NULL);
183 max_cases = strtoull (optarg, NULL, 0);
203 length = atoi (optarg);
206 case OPT_PASSWORD_LIST:
207 password_list = optarg;
211 csv_opts.recode_user_missing = true;
214 case OPT_NO_VAR_NAMES:
215 csv_opts.include_var_names = false;
219 csv_opts.use_value_labels = true;
223 parse_character_option (optarg, "--decimal", &csv_opts.decimal);
227 parse_character_option (optarg, "--delimiter", &csv_opts.delimiter);
231 parse_character_option (optarg, "--qualifier", &csv_opts.qualifier);
235 for (const char *p = optarg; *p;)
236 if (p[1] == '-' && p[2] > p[0])
238 for (int ch = p[0]; ch <= p[2]; ch++)
239 ds_put_byte (&alphabet, ch);
243 ds_put_byte (&alphabet, *p++);
247 output_format = optarg;
251 version_etc (stdout, "pspp-convert", PACKAGE_NAME, PACKAGE_VERSION,
252 "Ben Pfaff", "John Darrington", NULL_SENTINEL);
264 if (optind + 2 != argc)
265 error (1, 0, _("exactly two non-option arguments are required; "
266 "use --help for help"));
268 input_filename = argv[optind];
269 output_filename = argv[optind + 1];
270 input_fh = fh_create_file (NULL, input_filename, NULL, fh_default_properties ());
272 if (output_format == NULL)
274 const char *dot = strrchr (output_filename, '.');
276 error (1, 0, _("%s: cannot guess output format (use -O option)"),
279 output_format = dot + 1;
282 output_fh = fh_create_file (NULL, output_filename, NULL, fh_default_properties ());
283 if (encrypted_file_open (&enc, input_fh) > 0)
285 if (decrypt_file (enc, input_fh, output_fh, password,
286 ds_cstr (&alphabet), length, password_list))
293 reader = any_reader_open_and_decode (input_fh, encoding, &dict, NULL);
299 struct variable **keep_vars;
301 if (!parse_variables_option (keep, dict, &keep_vars, &n_keep_vars))
303 dict_reorder_vars (dict, keep_vars, n_keep_vars);
304 dict_delete_consecutive_vars (dict, n_keep_vars,
305 dict_get_var_cnt (dict) - n_keep_vars);
311 struct variable **drop_vars;
313 if (!parse_variables_option (drop, dict, &drop_vars, &n_drop_vars))
315 dict_delete_vars (dict, drop_vars, n_drop_vars);
319 if (!strcmp (output_format, "csv") || !strcmp (output_format, "txt"))
321 if (!csv_opts.delimiter)
322 csv_opts.delimiter = csv_opts.decimal == '.' ? ',' : ';';
323 writer = csv_writer_open (output_fh, dict, &csv_opts);
325 else if (!strcmp (output_format, "sav") || !strcmp (output_format, "sys"))
327 struct sfm_write_options options;
329 options = sfm_writer_default_options ();
330 writer = sfm_open_writer (output_fh, dict, options);
332 else if (!strcmp (output_format, "por"))
334 struct pfm_write_options options;
336 options = pfm_writer_default_options ();
337 writer = pfm_open_writer (output_fh, dict, options);
341 error (1, 0, _("%s: unknown output format (use -O option)"),
346 error (1, 0, _("%s: error opening output file"), output_filename);
348 for (i = 0; i < max_cases; i++)
352 c = casereader_read (reader);
356 casewriter_write (writer, c);
359 if (!casereader_destroy (reader))
360 error (1, 0, _("%s: error reading input file"), input_filename);
361 if (!casewriter_destroy (writer))
362 error (1, 0, _("%s: error writing output file"), output_filename);
365 ds_destroy (&alphabet);
367 fh_unref (output_fh);
375 casereader_destroy (reader);
376 ds_destroy (&alphabet);
378 fh_unref (output_fh);
387 decrypt_file (struct encrypted_file *enc,
388 const struct file_handle *ifh,
389 const struct file_handle *ofh,
390 const char *password,
391 const char *alphabet,
393 const char *password_list)
397 const char *input_filename = fh_get_file_name (ifh);
398 const char *output_filename = fh_get_file_name (ofh);
403 if (!strcmp (password_list, "-"))
404 password_file = stdin;
407 password_file = fopen (password_list, "r");
409 error (1, errno, _("%s: error opening password file"),
413 struct string pw = DS_EMPTY_INITIALIZER;
414 unsigned int target = 100000;
415 for (unsigned int i = 0; ; i++)
418 if (!ds_read_line (&pw, password_file, SIZE_MAX))
420 if (isatty (STDOUT_FILENO))
425 error (1, 0, _("\n%s: password not in file"), password_list);
427 ds_chomp_byte (&pw, '\n');
432 if (isatty (STDOUT_FILENO))
439 if (encrypted_file_unlock__ (enc, ds_cstr (&pw)))
441 printf ("\npassword is: \"%s\"\n", ds_cstr (&pw));
442 password = ds_cstr (&pw);
447 else if (alphabet[0] && max_length)
449 size_t alphabet_size = strlen (alphabet);
450 char *pw = xmalloc (max_length + 1);
451 int *indexes = xzalloc (max_length * sizeof *indexes);
453 for (int len = password ? strlen (password) : 0;
454 len <= max_length; len++)
456 if (password && len == strlen (password))
458 for (int i = 0; i < len; i++)
460 const char *p = strchr (alphabet, password[i]);
462 error (1, 0, _("%s: '%c' is not in alphabet"),
463 password, password[i]);
464 indexes[i] = p - alphabet;
470 memset (indexes, 0, len * sizeof *indexes);
471 for (int i = 0; i < len; i++)
476 unsigned int target = 0;
477 for (unsigned int j = 0; ; j++)
482 if (isatty (STDOUT_FILENO))
484 printf ("\rlength %d: %s", len, pw);
488 if (encrypted_file_unlock__ (enc, pw))
490 printf ("\npassword is: \"%s\"\n", pw);
496 for (i = 0; i < len; i++)
497 if (++indexes[i] < alphabet_size)
499 pw[i] = alphabet[indexes[i]];
505 pw[i] = alphabet[indexes[i]];
518 if (password == NULL)
520 password = getpass ("password: ");
521 if (password == NULL)
525 if (!encrypted_file_unlock (enc, password))
526 error (1, 0, _("sorry, wrong password"));
529 out = fn_open (ofh, "wb");
531 error (1, errno, ("%s: error opening output file"), output_filename);
535 uint8_t buffer[1024];
538 n = encrypted_file_read (enc, buffer, sizeof buffer);
542 if (fwrite (buffer, 1, n, out) != n)
543 error (1, errno, ("%s: write error"), output_filename);
546 err = encrypted_file_close (enc);
548 error (1, err, ("%s: read error"), input_filename);
550 if (fflush (out) == EOF)
551 error (1, errno, ("%s: write error"), output_filename);
561 %s, a utility for converting SPSS data files to other formats.\n\
562 Usage: %s [OPTION]... INPUT OUTPUT\n\
563 where INPUT is an SPSS data file or encrypted syntax file\n\
564 and OUTPUT is the name of the desired output file.\n\
566 The desired format of OUTPUT is by default inferred from its extension:\n\
567 csv txt comma-separated value\n\
568 sav sys SPSS system file\n\
569 por SPSS portable file\n\
570 sps SPSS syntax file (encrypted syntax input files only)\n\
573 -O, --output-format=FORMAT set specific output format, where FORMAT\n\
574 is one of the extensions listed above\n\
575 -e, --encoding=CHARSET override encoding of input data file\n\
576 -c MAXCASES limit number of cases to copy (default is all cases)\n\
577 -k, --keep=VAR... include only the given variables in output\n\
578 -d, --drop=VAR... drop the given variables from output\n\
579 CSV output options:\n\
580 --recode convert user-missing values to system-missing\n\
581 --no-var-names do not include variable names as first row\n\
582 --labels write value labels to output\n\
583 --print-formats honor variables' print formats\n\
584 --decimal=CHAR use CHAR as the decimal point (default: .)\n\
585 --delimiter=CHAR use CHAR to separate fields (default: ,)\n\
586 --qualifier=CHAR use CHAR to quote the delimiter (default: \")\n\
587 Password options (for used with encrypted files):\n\
588 -p PASSWORD individual password\n\
589 -a ALPHABET with -l, alphabet of passwords to try\n\
590 -l MAX-LENGTH with -a, maximum number of characters to try\n\
591 --password-list=FILE try all of the passwords in FILE (one per line)\n\
593 --help display this help and exit\n\
594 --version output version information and exit\n",
595 program_name, program_name);