1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2013, 2014, 2015, 2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "data/any-reader.h"
26 #include "data/case-map.h"
27 #include "data/casereader.h"
28 #include "data/casewriter.h"
29 #include "data/csv-file-writer.h"
30 #include "data/dictionary.h"
31 #include "data/encrypted-file.h"
32 #include "data/file-name.h"
33 #include "data/por-file-writer.h"
34 #include "data/settings.h"
35 #include "data/sys-file-writer.h"
36 #include "data/file-handle-def.h"
37 #include "language/command.h"
38 #include "language/lexer/lexer.h"
39 #include "language/lexer/variable-parser.h"
40 #include "libpspp/assertion.h"
41 #include "libpspp/cast.h"
42 #include "libpspp/i18n.h"
45 #include "gl/getpass.h"
46 #include "gl/localcharset.h"
47 #include "gl/progname.h"
48 #include "gl/version-etc.h"
51 #define _(msgid) gettext (msgid)
53 static void usage (void);
55 static bool decrypt_file (struct encrypted_file *enc,
56 const struct file_handle *input_filename,
57 const struct file_handle *output_filename,
59 const char *alphabet, int max_length,
60 const char *password_list);
63 parse_character_option (const char *arg, const char *option_name, char *out)
65 if (strlen (arg) != 1)
67 /* XXX support multibyte characters */
68 error (1, 0, _("%s argument must be a single character"), option_name);
74 parse_variables_option (const char *arg, struct dictionary *dict,
75 struct variable ***vars, size_t *n_vars)
77 struct lexer *lexer = lex_create ();
78 lex_append (lexer, lex_reader_for_string (arg, locale_charset ()));
81 bool ok = parse_variables (lexer, dict, vars, n_vars, 0);
82 if (ok && (lex_token (lexer) != T_STOP && lex_token (lexer) != T_ENDCMD))
84 lex_error (lexer, _("Syntax error expecting variable name."));
99 main (int argc, char *argv[])
101 const char *input_filename;
102 const char *output_filename;
104 long long int max_cases = LLONG_MAX;
105 const char *keep = NULL;
106 const char *drop = NULL;
107 struct dictionary *dict = NULL;
108 struct casereader *reader = NULL;
109 struct file_handle *input_fh = NULL;
110 const char *encoding = NULL;
111 struct encrypted_file *enc;
113 const char *output_format = NULL;
114 struct file_handle *output_fh = NULL;
115 struct casewriter *writer;
116 const char *password = NULL;
117 struct string alphabet = DS_EMPTY_INITIALIZER;
118 const char *password_list = NULL;
121 struct csv_writer_options csv_opts = {
122 .include_var_names = true,
124 .delimiter = 0, /* The default will be set later. */
130 set_program_name (argv[0]);
139 OPT_PASSWORD_LIST = UCHAR_MAX + 1,
148 static const struct option long_options[] =
150 { "cases", required_argument, NULL, 'c' },
151 { "keep", required_argument, NULL, 'k' },
152 { "drop", required_argument, NULL, 'd' },
153 { "encoding", required_argument, NULL, 'e' },
155 { "recode", no_argument, NULL, OPT_RECODE },
156 { "no-var-names", no_argument, NULL, OPT_NO_VAR_NAMES },
157 { "labels", no_argument, NULL, OPT_LABELS },
158 { "print-formats", no_argument, NULL, OPT_PRINT_FORMATS },
159 { "decimal", required_argument, NULL, OPT_DECIMAL },
160 { "delimiter", required_argument, NULL, OPT_DELIMITER },
161 { "qualifier", required_argument, NULL, OPT_QUALIFIER },
163 { "password", required_argument, NULL, 'p' },
164 { "password-alphabet", required_argument, NULL, 'a' },
165 { "password-length", required_argument, NULL, 'l' },
166 { "password-list", required_argument, NULL, OPT_PASSWORD_LIST },
168 { "output-format", required_argument, NULL, 'O' },
170 { "help", no_argument, NULL, 'h' },
171 { "version", no_argument, NULL, 'v' },
172 { NULL, 0, NULL, 0 },
177 c = getopt_long (argc, argv, "c:k:d:e:p:a:l:O:hv", long_options, NULL);
184 max_cases = strtoull (optarg, NULL, 0);
204 length = atoi (optarg);
207 case OPT_PASSWORD_LIST:
208 password_list = optarg;
212 csv_opts.recode_user_missing = true;
215 case OPT_NO_VAR_NAMES:
216 csv_opts.include_var_names = false;
220 csv_opts.use_value_labels = true;
224 parse_character_option (optarg, "--decimal", &csv_opts.decimal);
228 parse_character_option (optarg, "--delimiter", &csv_opts.delimiter);
232 parse_character_option (optarg, "--qualifier", &csv_opts.qualifier);
236 for (const char *p = optarg; *p;)
237 if (p[1] == '-' && p[2] > p[0])
239 for (int ch = p[0]; ch <= p[2]; ch++)
240 ds_put_byte (&alphabet, ch);
244 ds_put_byte (&alphabet, *p++);
248 output_format = optarg;
252 version_etc (stdout, "pspp-convert", PACKAGE_NAME, PACKAGE_VERSION,
253 "Ben Pfaff", "John Darrington", NULL_SENTINEL);
265 if (optind + 2 != argc)
266 error (1, 0, _("exactly two non-option arguments are required; "
267 "use --help for help"));
269 input_filename = argv[optind];
270 output_filename = argv[optind + 1];
271 input_fh = fh_create_file (NULL, input_filename, NULL, fh_default_properties ());
273 if (output_format == NULL)
275 const char *dot = strrchr (output_filename, '.');
277 error (1, 0, _("%s: cannot guess output format (use -O option)"),
280 output_format = dot + 1;
283 output_fh = fh_create_file (NULL, output_filename, NULL, fh_default_properties ());
284 if (encrypted_file_open (&enc, input_fh) > 0)
286 if (decrypt_file (enc, input_fh, output_fh, password,
287 ds_cstr (&alphabet), length, password_list))
294 reader = any_reader_open_and_decode (input_fh, encoding, &dict, NULL);
298 struct case_map_stage *stage = case_map_stage_create (dict);
301 struct variable **keep_vars;
303 if (!parse_variables_option (keep, dict, &keep_vars, &n_keep_vars))
305 dict_reorder_vars (dict, keep_vars, n_keep_vars);
306 dict_delete_consecutive_vars (dict, n_keep_vars,
307 dict_get_n_vars (dict) - n_keep_vars);
313 struct variable **drop_vars;
315 if (!parse_variables_option (drop, dict, &drop_vars, &n_drop_vars))
317 dict_delete_vars (dict, drop_vars, n_drop_vars);
321 reader = case_map_create_input_translator (
322 case_map_stage_get_case_map (stage), reader);
323 case_map_stage_destroy (stage);
325 if (!strcmp (output_format, "csv") || !strcmp (output_format, "txt"))
327 if (!csv_opts.delimiter)
328 csv_opts.delimiter = csv_opts.decimal == '.' ? ',' : ';';
329 writer = csv_writer_open (output_fh, dict, &csv_opts);
331 else if (!strcmp (output_format, "sav") || !strcmp (output_format, "sys"))
333 struct sfm_write_options options;
335 options = sfm_writer_default_options ();
336 writer = sfm_open_writer (output_fh, dict, options);
338 else if (!strcmp (output_format, "por"))
340 struct pfm_write_options options;
342 options = pfm_writer_default_options ();
343 writer = pfm_open_writer (output_fh, dict, options);
347 error (1, 0, _("%s: unknown output format (use -O option)"),
352 error (1, 0, _("%s: error opening output file"), output_filename);
354 for (i = 0; i < max_cases; i++)
358 c = casereader_read (reader);
362 casewriter_write (writer, c);
365 if (!casereader_destroy (reader))
366 error (1, 0, _("%s: error reading input file"), input_filename);
367 if (!casewriter_destroy (writer))
368 error (1, 0, _("%s: error writing output file"), output_filename);
371 ds_destroy (&alphabet);
373 fh_unref (output_fh);
381 casereader_destroy (reader);
382 ds_destroy (&alphabet);
384 fh_unref (output_fh);
393 decrypt_file (struct encrypted_file *enc,
394 const struct file_handle *ifh,
395 const struct file_handle *ofh,
396 const char *password,
397 const char *alphabet,
399 const char *password_list)
403 const char *input_filename = fh_get_file_name (ifh);
404 const char *output_filename = fh_get_file_name (ofh);
409 if (!strcmp (password_list, "-"))
410 password_file = stdin;
413 password_file = fopen (password_list, "r");
415 error (1, errno, _("%s: error opening password file"),
419 struct string pw = DS_EMPTY_INITIALIZER;
420 unsigned int target = 100000;
421 for (unsigned int i = 0; ; i++)
424 if (!ds_read_line (&pw, password_file, SIZE_MAX))
426 if (isatty (STDOUT_FILENO))
431 error (1, 0, _("\n%s: password not in file"), password_list);
433 ds_chomp_byte (&pw, '\n');
438 if (isatty (STDOUT_FILENO))
445 if (encrypted_file_unlock__ (enc, ds_cstr (&pw)))
447 printf ("\npassword is: \"%s\"\n", ds_cstr (&pw));
448 password = ds_cstr (&pw);
453 else if (alphabet[0] && max_length)
455 size_t alphabet_size = strlen (alphabet);
456 char *pw = xmalloc (max_length + 1);
457 int *indexes = xzalloc (max_length * sizeof *indexes);
459 for (int len = password ? strlen (password) : 0;
460 len <= max_length; len++)
462 if (password && len == strlen (password))
464 for (int i = 0; i < len; i++)
466 const char *p = strchr (alphabet, password[i]);
468 error (1, 0, _("%s: '%c' is not in alphabet"),
469 password, password[i]);
470 indexes[i] = p - alphabet;
476 memset (indexes, 0, len * sizeof *indexes);
477 for (int i = 0; i < len; i++)
482 unsigned int target = 0;
483 for (unsigned int j = 0; ; j++)
488 if (isatty (STDOUT_FILENO))
490 printf ("\rlength %d: %s", len, pw);
494 if (encrypted_file_unlock__ (enc, pw))
496 printf ("\npassword is: \"%s\"\n", pw);
502 for (i = 0; i < len; i++)
503 if (++indexes[i] < alphabet_size)
505 pw[i] = alphabet[indexes[i]];
511 pw[i] = alphabet[indexes[i]];
524 if (password == NULL)
526 password = getpass ("password: ");
527 if (password == NULL)
531 if (!encrypted_file_unlock (enc, password))
532 error (1, 0, _("sorry, wrong password"));
535 out = fn_open (ofh, "wb");
537 error (1, errno, ("%s: error opening output file"), output_filename);
541 uint8_t buffer[1024];
544 n = encrypted_file_read (enc, buffer, sizeof buffer);
548 if (fwrite (buffer, 1, n, out) != n)
549 error (1, errno, ("%s: write error"), output_filename);
552 err = encrypted_file_close (enc);
554 error (1, err, ("%s: read error"), input_filename);
556 if (fflush (out) == EOF)
557 error (1, errno, ("%s: write error"), output_filename);
567 %s, a utility for converting SPSS data files to other formats.\n\
568 Usage: %s [OPTION]... INPUT OUTPUT\n\
569 where INPUT is an SPSS data file or encrypted syntax file\n\
570 and OUTPUT is the name of the desired output file.\n\
572 The desired format of OUTPUT is by default inferred from its extension:\n\
573 csv txt comma-separated value\n\
574 sav sys SPSS system file\n\
575 por SPSS portable file\n\
576 sps SPSS syntax file (encrypted syntax input files only)\n\
579 -O, --output-format=FORMAT set specific output format, where FORMAT\n\
580 is one of the extensions listed above\n\
581 -e, --encoding=CHARSET override encoding of input data file\n\
582 -c MAXCASES limit number of cases to copy (default is all cases)\n\
583 -k, --keep=VAR... include only the given variables in output\n\
584 -d, --drop=VAR... drop the given variables from output\n\
585 CSV output options:\n\
586 --recode convert user-missing values to system-missing\n\
587 --no-var-names do not include variable names as first row\n\
588 --labels write value labels to output\n\
589 --print-formats honor variables' print formats\n\
590 --decimal=CHAR use CHAR as the decimal point (default: .)\n\
591 --delimiter=CHAR use CHAR to separate fields (default: ,)\n\
592 --qualifier=CHAR use CHAR to quote the delimiter (default: \")\n\
593 Password options (for used with encrypted files):\n\
594 -p PASSWORD individual password\n\
595 -a ALPHABET with -l, alphabet of passwords to try\n\
596 -l MAX-LENGTH with -a, maximum number of characters to try\n\
597 --password-list=FILE try all of the passwords in FILE (one per line)\n\
599 --help display this help and exit\n\
600 --version output version information and exit\n",
601 program_name, program_name);