1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
29 #include "libpspp/assertion.h"
30 #include "libpspp/compiler.h"
31 #include "libpspp/float-format.h"
32 #include "libpspp/integer-format.h"
34 #include "gl/c-ctype.h"
37 #include "gl/intprops.h"
38 #include "gl/progname.h"
39 #include "gl/xalloc.h"
48 static void buffer_put (struct buffer *, const void *, size_t);
49 static void *buffer_put_uninit (struct buffer *, size_t);
68 static enum token_type token;
69 static unsigned long long int tok_integer;
70 static double tok_float;
71 static char *tok_string;
72 static size_t tok_strlen, tok_allocated;
74 /* --be, --le: Integer and floating-point formats. */
75 static enum float_format float_format = FLOAT_IEEE_DOUBLE_BE;
76 static enum integer_format integer_format = INTEGER_MSB_FIRST;
78 /* Input file and current position. */
80 static const char *input_file_name;
81 static int line_number;
83 static void PRINTF_FORMAT (1, 2)
84 fatal (const char *message, ...)
88 fprintf (stderr, "%s:%d: ", input_file_name, line_number);
89 va_start (args, message);
90 vfprintf (stderr, message, args);
100 if (tok_strlen >= tok_allocated)
101 tok_string = x2realloc (tok_string, &tok_allocated);
103 tok_string[tok_strlen] = c;
123 while ((c = getc (input)) != '\n' && c != EOF)
129 while (isspace (c) || c == '<' || c == '>');
135 fatal ("unexpected end of input");
138 else if (isdigit (c) || c == '-')
147 while (isdigit (c) || isalpha (c) || c == '.');
152 if (strchr (tok_string, '.') == NULL)
155 tok_integer = strtoull (tok_string, &tail, 0);
160 tok_float = strtod (tok_string, &tail);
163 fatal ("invalid numeric syntax");
168 while ((c = getc (input)) != '"')
171 fatal ("new-line inside string");
184 else if (isalpha (c))
191 while (isdigit (c) || isalpha (c) || c == '.');
195 if (!strcmp (tok_string, "i8"))
197 else if (!strcmp (tok_string, "i64"))
199 else if (tok_string[0] == 's')
202 tok_integer = atoi (tok_string + 1);
204 else if (!strcmp (tok_string, "SYSMIS"))
207 tok_float = -DBL_MAX;
209 else if (!strcmp (tok_string, "LOWEST"))
212 tok_float = float_get_lowest ();
214 else if (!strcmp (tok_string, "HIGHEST"))
219 else if (!strcmp (tok_string, "ENDIAN"))
222 tok_integer = integer_format == INTEGER_MSB_FIRST ? 1 : 2;
224 else if (!strcmp (tok_string, "COUNT"))
226 else if (!strcmp (tok_string, "hex"))
229 fatal ("invalid token `%s'", tok_string);
232 fatal ("invalid input byte `%c'", c);
236 buffer_put (struct buffer *buffer, const void *data, size_t n)
238 memcpy (buffer_put_uninit (buffer, n), data, n);
242 buffer_put_uninit (struct buffer *buffer, size_t n)
245 if (buffer->size > buffer->allocated)
247 buffer->allocated = buffer->size * 2;
248 buffer->data = xrealloc (buffer->data, buffer->allocated);
250 return &buffer->data[buffer->size - n];
253 /* Returns the integer value of hex digit C. */
257 const char s[] = "0123456789abcdef";
258 const char *cp = strchr (s, c_tolower ((unsigned char) c));
268 %s, SAv Construction Kit\n\
269 usage: %s [OPTIONS] INPUT\n\
271 --be big-endian output format (default)\n\
272 --le little-endian output format\n\
273 --help print this help message and exit\n\
275 The input is a sequence of data items, each followed by a semicolon.\n\
276 Each data item is converted to the output format and written on\n\
277 stdout. A data item is one of the following\n\
279 - An integer in decimal, in hexadecimal prefixed by 0x, or in octal\n\
280 prefixed by 0. Output as a 32-bit binary integer.\n\
282 - A floating-point number. Output in 64-bit IEEE 754 format.\n\
284 - A string enclosed in double quotes. Output literally. There is\n\
285 no syntax for \"escapes\". Strings may not contain new-lines.\n\
287 - A literal of the form s<number> followed by a quoted string as\n\
288 above. Output as the string's contents followed by enough spaces\n\
289 to fill up <number> bytes. For example, s8 \"foo\" is output as\n\
290 the \"foo\" followed by 5 spaces.\n\
292 - The literal \"i8\" followed by an integer. Output as a single\n\
293 byte with the specified value.\n\
295 - The literal \"i64\" followed by an integer. Output as a 64-bit\n\
298 - One of the literals SYSMIS, LOWEST, or HIGHEST. Output as a\n\
299 64-bit IEEE 754 float of the appropriate PSPP value.\n\
301 - The literal ENDIAN. Output as a 32-bit binary integer, either\n\
302 with value 1 if --be is in effect or 2 if --le is in effect.\n\
304 - A pair of parentheses enclosing a sequence of data items, each\n\
305 followed by a semicolon (the last semicolon is optional).\n\
306 Output as the enclosed data items in sequence.\n\
308 - The literal COUNT followed by a sequence of parenthesized data\n\
309 items, as above. Output as a 32-bit binary integer whose value\n\
310 is the number of bytes enclosed within the parentheses, followed\n\
311 by the enclosed data items themselves.\n\
313 optionally followed by an asterisk and a positive integer, which\n\
314 specifies a repeat count for the data item.\n\
316 The md5sum of the data written to stdout is written to stderr as\n\
317 16 hexadecimal digits followed by a new-line.\n",
318 program_name, program_name);
323 parse_options (int argc, char **argv)
328 OPT_BE = UCHAR_MAX + 1,
332 static const struct option options[] =
334 {"be", no_argument, NULL, OPT_BE},
335 {"le", no_argument, NULL, OPT_LE},
336 {"help", no_argument, NULL, OPT_HELP},
340 int c = getopt_long (argc, argv, "", options, NULL);
347 float_format = FLOAT_IEEE_DOUBLE_BE;
348 integer_format = INTEGER_MSB_FIRST;
352 float_format = FLOAT_IEEE_DOUBLE_LE;
353 integer_format = INTEGER_LSB_FIRST;
372 if (optind + 1 != argc)
373 error (1, 0, "exactly one non-option argument required; "
374 "use --help for help");
379 parse_data_item (struct buffer *output)
381 size_t old_size = output->size;
383 if (token == T_INTEGER)
385 integer_put (tok_integer, integer_format,
386 buffer_put_uninit (output, 4), 4);
389 else if (token == T_FLOAT)
391 float_convert (FLOAT_NATIVE_DOUBLE, &tok_float,
392 float_format, buffer_put_uninit (output, 8));
395 else if (token == T_I8)
402 if (token != T_INTEGER)
403 fatal ("integer expected after `i8'");
405 buffer_put (output, &byte, 1);
408 while (token == T_INTEGER);
410 else if (token == T_I64)
415 if (token != T_INTEGER)
416 fatal ("integer expected after `i64'");
417 integer_put (tok_integer, integer_format,
418 buffer_put_uninit (output, 8), 8);
421 while (token == T_INTEGER);
423 else if (token == T_STRING)
425 buffer_put (output, tok_string, tok_strlen);
428 else if (token == T_S)
435 if (token != T_STRING)
436 fatal ("string expected");
438 fatal ("%zu-byte string is longer than pad length %d",
441 buffer_put (output, tok_string, tok_strlen);
442 memset (buffer_put_uninit (output, n - tok_strlen), ' ',
446 else if (token == T_LPAREN)
450 while (token != T_RPAREN)
451 parse_data_item (output);
455 else if (token == T_COUNT)
457 buffer_put_uninit (output, 4);
460 if (token != T_LPAREN)
461 fatal ("`(' expected after COUNT");
464 while (token != T_RPAREN)
465 parse_data_item (output);
468 integer_put (output->size - old_size - 4, integer_format,
469 output->data + old_size, 4);
471 else if (token == T_HEX)
477 if (token != T_STRING)
478 fatal ("string expected");
480 for (p = tok_string; *p; p++)
482 if (isspace ((unsigned char) *p))
484 else if (isxdigit ((unsigned char) p[0])
485 && isxdigit ((unsigned char) p[1]))
487 int high = hexit_value (p[0]);
488 int low = hexit_value (p[1]);
489 uint8_t byte = high * 16 + low;
490 buffer_put (output, &byte, 1);
494 fatal ("invalid format in hex string");
499 fatal ("syntax error");
501 if (token == T_ASTERISK)
503 size_t n = output->size - old_size;
508 if (token != T_INTEGER || tok_integer < 1)
509 fatal ("positive integer expected after `*'");
510 p = buffer_put_uninit (output, (tok_integer - 1) * n);
511 while (--tok_integer > 0)
513 memcpy (p, output->data + old_size, n);
520 if (token == T_SEMICOLON)
522 else if (token != T_RPAREN)
523 fatal ("`;' expected");
527 main (int argc, char **argv)
529 struct buffer output;
533 set_program_name (argv[0]);
534 input_file_name = parse_options (argc, argv);
536 if (!strcmp (input_file_name, "-"))
540 input = fopen (input_file_name, "r");
542 error (1, errno, "%s: open failed", input_file_name);
545 if (isatty (STDOUT_FILENO))
546 error (1, 0, "not writing binary data to a terminal; redirect to a file");
550 output.allocated = 0;
554 while (token != T_EOF)
555 parse_data_item (&output);
560 fwrite (output.data, output.size, 1, stdout);
562 md5_buffer ((const char *) output.data, output.size, digest);
563 for (i = 0; i < sizeof digest; i++)
564 fprintf (stderr, "%02x", digest[i]);