1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
29 #include "libpspp/assertion.h"
30 #include "libpspp/compiler.h"
31 #include "libpspp/float-format.h"
32 #include "libpspp/hash-functions.h"
33 #include "libpspp/hmap.h"
34 #include "libpspp/integer-format.h"
36 #include "gl/c-ctype.h"
38 #include "gl/intprops.h"
39 #include "gl/progname.h"
40 #include "gl/xalloc.h"
49 static void buffer_put (struct buffer *, const void *, size_t);
50 static void *buffer_put_uninit (struct buffer *, size_t);
76 static enum token_type token;
77 static unsigned long long int tok_integer;
78 static double tok_float;
79 static char *tok_string;
80 static size_t tok_strlen, tok_allocated;
85 struct hmap_node hmap_node;
90 static struct hmap symbol_table = HMAP_INITIALIZER (symbol_table);
92 /* --be, --le: Integer and floating-point formats. */
93 static enum float_format float_format = FLOAT_IEEE_DOUBLE_BE;
94 static enum integer_format integer_format = INTEGER_MSB_FIRST;
96 /* Input file and current position. */
98 static const char *input_file_name;
99 static int line_number;
101 static void PRINTF_FORMAT (1, 2)
102 fatal (const char *message, ...)
106 fprintf (stderr, "%s:%d: ", input_file_name, line_number);
107 va_start (args, message);
108 vfprintf (stderr, message, args);
118 if (tok_strlen >= tok_allocated)
119 tok_string = x2realloc (tok_string, &tok_allocated);
121 tok_string[tok_strlen] = c;
141 while ((c = getc (input)) != '\n' && c != EOF)
147 while (isspace (c) || c == '<' || c == '>');
153 fatal ("unexpected end of input");
156 else if (isdigit (c) || c == '-')
163 while (isdigit (c) || isalpha (c) || c == '.');
167 if (!strcmp (tok_string, "-"))
174 if (strchr (tok_string, '.') == NULL)
177 tok_integer = strtoull (tok_string, &tail, 0);
182 tok_float = strtod (tok_string, &tail);
185 fatal ("invalid numeric syntax \"%s\"", tok_string);
191 while ((c = getc (input)) != '"')
194 fatal ("new-line inside string");
209 else if (isalpha (c) || c == '@' || c == '_')
216 while (isdigit (c) || isalpha (c) || c == '.' || c == '_');
225 if (tok_string[0] == '@')
231 if (!strcmp (tok_string, "i8"))
233 else if (!strcmp (tok_string, "i16"))
235 else if (!strcmp (tok_string, "i64"))
237 else if (tok_string[0] == 's')
240 tok_integer = atoi (tok_string + 1);
242 else if (!strcmp (tok_string, "SYSMIS"))
245 tok_float = -DBL_MAX;
247 else if (!strcmp (tok_string, "PCSYSMIS"))
249 else if (!strcmp (tok_string, "LOWEST"))
252 tok_float = float_get_lowest ();
254 else if (!strcmp (tok_string, "HIGHEST"))
259 else if (!strcmp (tok_string, "ENDIAN"))
262 tok_integer = integer_format == INTEGER_MSB_FIRST ? 1 : 2;
264 else if (!strcmp (tok_string, "COUNT"))
266 else if (!strcmp (tok_string, "COUNT8"))
268 else if (!strcmp (tok_string, "hex"))
271 fatal ("invalid token `%s'", tok_string);
274 fatal ("invalid input byte `%c'", c);
278 buffer_put (struct buffer *buffer, const void *data, size_t n)
280 memcpy (buffer_put_uninit (buffer, n), data, n);
284 buffer_put_uninit (struct buffer *buffer, size_t n)
287 if (buffer->size > buffer->allocated)
289 buffer->allocated = buffer->size * 2;
290 buffer->data = xrealloc (buffer->data, buffer->allocated);
292 return &buffer->data[buffer->size - n];
295 /* Returns the integer value of hex digit C. */
299 const char s[] = "0123456789abcdef";
300 const char *cp = strchr (s, c_tolower ((unsigned char) c));
310 %s, SAv Construction Kit\n\
311 usage: %s [OPTIONS] INPUT\n\
313 --be big-endian output format (default)\n\
314 --le little-endian output format\n\
315 --help print this help message and exit\n\
317 The input is a sequence of data items, each followed by a semicolon.\n\
318 Each data item is converted to the output format and written on\n\
319 stdout. A data item is one of the following\n\
321 - An integer in decimal, in hexadecimal prefixed by 0x, or in octal\n\
322 prefixed by 0. Output as a 32-bit binary integer.\n\
324 - A floating-point number. Output in 64-bit IEEE 754 format.\n\
326 - A string enclosed in double quotes. Output literally. There is\n\
327 no syntax for \"escapes\". Strings may not contain new-lines.\n\
329 - A literal of the form s<number> followed by a quoted string as\n\
330 above. Output as the string's contents followed by enough spaces\n\
331 to fill up <number> bytes. For example, s8 \"foo\" is output as\n\
332 the \"foo\" followed by 5 spaces.\n\
334 - The literal \"i8\", \"i16\", or \"i64\" followed by an integer. Output\n\
335 as a binary integer with the specified number of bits.\n\
337 - One of the literals SYSMIS, LOWEST, or HIGHEST. Output as a\n\
338 64-bit IEEE 754 float of the appropriate PSPP value.\n\
340 - PCSYSMIS. Output as SPSS/PC+ system-missing value.\n\
342 - The literal ENDIAN. Output as a 32-bit binary integer, either\n\
343 with value 1 if --be is in effect or 2 if --le is in effect.\n\
345 - A pair of parentheses enclosing a sequence of data items, each\n\
346 followed by a semicolon (the last semicolon is optional).\n\
347 Output as the enclosed data items in sequence.\n\
349 - The literal COUNT or COUNT8 followed by a sequence of parenthesized\n\
350 data items, as above. Output as a 32-bit or 8-bit binary integer whose\n\
351 value is the number of bytes enclosed within the parentheses, followed\n\
352 by the enclosed data items themselves.\n\
354 optionally followed by an asterisk and a positive integer, which\n\
355 specifies a repeat count for the data item.\n",
356 program_name, program_name);
361 parse_options (int argc, char **argv)
366 OPT_BE = UCHAR_MAX + 1,
370 static const struct option options[] =
372 {"be", no_argument, NULL, OPT_BE},
373 {"le", no_argument, NULL, OPT_LE},
374 {"help", no_argument, NULL, OPT_HELP},
378 int c = getopt_long (argc, argv, "", options, NULL);
385 float_format = FLOAT_IEEE_DOUBLE_BE;
386 integer_format = INTEGER_MSB_FIRST;
390 float_format = FLOAT_IEEE_DOUBLE_LE;
391 integer_format = INTEGER_LSB_FIRST;
410 if (optind + 1 != argc)
411 error (1, 0, "exactly one non-option argument required; "
412 "use --help for help");
416 static struct symbol *
417 symbol_find (const char *name)
419 struct symbol *symbol;
424 hash = hash_string (name, 0);
425 HMAP_FOR_EACH_WITH_HASH (symbol, struct symbol, hmap_node,
427 if (!strcmp (name, symbol->name))
430 symbol = xmalloc (sizeof *symbol);
431 hmap_insert (&symbol_table, &symbol->hmap_node, hash);
432 symbol->name = xstrdup (name);
433 symbol->offset = UINT_MAX;
438 parse_data_item (struct buffer *output)
440 size_t old_size = output->size;
442 if (token == T_INTEGER)
444 integer_put (tok_integer, integer_format,
445 buffer_put_uninit (output, 4), 4);
448 else if (token == T_FLOAT)
450 float_convert (FLOAT_NATIVE_DOUBLE, &tok_float,
451 float_format, buffer_put_uninit (output, 8));
454 else if (token == T_PCSYSMIS)
456 static const uint8_t pcsysmis[] =
457 { 0xf5, 0x1e, 0x26, 0x02, 0x8a, 0x8c, 0xed, 0xff, };
458 buffer_put (output, pcsysmis, sizeof pcsysmis);
461 else if (token == T_I8)
468 if (token != T_INTEGER)
469 fatal ("integer expected after `i8'");
471 buffer_put (output, &byte, 1);
474 while (token == T_INTEGER);
476 else if (token == T_I16)
481 if (token != T_INTEGER)
482 fatal ("integer expected after `i16'");
483 integer_put (tok_integer, integer_format,
484 buffer_put_uninit (output, 2), 2);
487 while (token == T_INTEGER);
489 else if (token == T_I64)
494 if (token != T_INTEGER)
495 fatal ("integer expected after `i64'");
496 integer_put (tok_integer, integer_format,
497 buffer_put_uninit (output, 8), 8);
500 while (token == T_INTEGER);
502 else if (token == T_STRING)
504 buffer_put (output, tok_string, tok_strlen);
507 else if (token == T_S)
514 if (token != T_STRING)
515 fatal ("string expected");
517 fatal ("%zu-byte string is longer than pad length %d",
520 buffer_put (output, tok_string, tok_strlen);
521 memset (buffer_put_uninit (output, n - tok_strlen), ' ',
525 else if (token == T_LPAREN)
529 while (token != T_RPAREN)
530 parse_data_item (output);
534 else if (token == T_COUNT)
536 buffer_put_uninit (output, 4);
539 if (token != T_LPAREN)
540 fatal ("`(' expected after COUNT");
543 while (token != T_RPAREN)
544 parse_data_item (output);
547 integer_put (output->size - old_size - 4, integer_format,
548 output->data + old_size, 4);
550 else if (token == T_COUNT8)
552 buffer_put_uninit (output, 1);
555 if (token != T_LPAREN)
556 fatal ("`(' expected after COUNT8");
559 while (token != T_RPAREN)
560 parse_data_item (output);
563 integer_put (output->size - old_size - 1, integer_format,
564 output->data + old_size, 1);
566 else if (token == T_HEX)
572 if (token != T_STRING)
573 fatal ("string expected");
575 for (p = tok_string; *p; p++)
577 if (isspace ((unsigned char) *p))
579 else if (isxdigit ((unsigned char) p[0])
580 && isxdigit ((unsigned char) p[1]))
582 int high = hexit_value (p[0]);
583 int low = hexit_value (p[1]);
584 uint8_t byte = high * 16 + low;
585 buffer_put (output, &byte, 1);
589 fatal ("invalid format in hex string");
593 else if (token == T_LABEL)
595 struct symbol *sym = symbol_find (tok_string);
596 if (sym->offset == UINT_MAX)
597 sym->offset = output->size;
598 else if (sym->offset != output->size)
599 fatal ("%s: can't redefine label for offset %u with offset %zu",
600 tok_string, sym->offset, output->size);
604 else if (token == T_AT)
606 unsigned int value = symbol_find (tok_string)->offset;
609 while (token == T_MINUS || token == T_PLUS)
611 enum token_type op = token;
612 unsigned int operand;
615 operand = symbol_find (tok_string)->offset;
616 else if (token == T_INTEGER)
617 operand = tok_integer;
619 fatal ("expecting @label");
627 integer_put (value, integer_format, buffer_put_uninit (output, 4), 4);
630 fatal ("syntax error");
632 if (token == T_ASTERISK)
634 size_t n = output->size - old_size;
639 if (token != T_INTEGER || tok_integer < 1)
640 fatal ("positive integer expected after `*'");
641 p = buffer_put_uninit (output, (tok_integer - 1) * n);
642 while (--tok_integer > 0)
644 memcpy (p, output->data + old_size, n);
651 if (token == T_SEMICOLON)
653 else if (token != T_RPAREN)
654 fatal ("`;' expected");
658 main (int argc, char **argv)
660 struct buffer output;
662 set_program_name (argv[0]);
663 input_file_name = parse_options (argc, argv);
665 if (!strcmp (input_file_name, "-"))
669 input = fopen (input_file_name, "r");
671 error (1, errno, "%s: open failed", input_file_name);
674 if (isatty (STDOUT_FILENO))
675 error (1, 0, "not writing binary data to a terminal; redirect to a file");
679 output.allocated = 0;
683 while (token != T_EOF)
684 parse_data_item (&output);
686 if (!hmap_is_empty (&symbol_table))
688 struct symbol *symbol;
690 HMAP_FOR_EACH (symbol, struct symbol, hmap_node, &symbol_table)
691 if (symbol->offset == UINT_MAX)
692 error (1, 0, "label %s used but never defined", symbol->name);
695 if (fseek (input, 0, SEEK_SET) != 0)
696 error (1, 0, "failed to rewind stdin for second pass");
700 while (token != T_EOF)
701 parse_data_item (&output);
707 fwrite (output.data, output.size, 1, stdout);