/* PSPP - a program for statistical analysis.
- Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2010, 2011, 2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "gl/progname.h"
#include "gl/read-file.h"
#include "gl/xalloc.h"
+#include "gl/xmemdup0.h"
/* -a/--auto, -b/--batch, -i/--interactive: syntax mode. */
static enum segmenter_mode mode = SEG_MODE_AUTO;
/* -1, --one-byte: Feed in one byte at a time? */
static bool one_byte;
+/* -0, --truncations: Check that every truncation of input yields a result. */
+static bool check_truncations;
+
+/* -s, --strip-trailing-newline: Strip trailing newline from last line of
+ input. */
+static bool strip_trailing_newline;
+
static const char *parse_options (int argc, char **argv);
static void usage (void) NO_RETURN;
+static void check_segmentation (const char *input, size_t length,
+ bool print_segments);
+
int
main (int argc, char *argv[])
{
- size_t offset, line_number, line_offset;
const char *file_name;
- char *input;
- struct segmenter s;
size_t length;
- int prev_type;
+ char *input;
set_program_name (argv[0]);
file_name = parse_options (argc, argv);
+ setvbuf (stdout, NULL, _IONBF, 0);
+
/* Read from stdin into 'input'. Ensure that 'input' ends in a new-line
followed by a null byte. */
input = (!strcmp (file_name, "-")
- ? fread_file (stdin, &length)
- : read_file (file_name, &length));
+ ? fread_file (stdin, 0, &length)
+ : read_file (file_name, 0, &length));
if (input == NULL)
error (EXIT_FAILURE, errno, "reading %s failed", file_name);
- input = xrealloc (input, length + 3);
- if (length == 0 || input[length - 1] != '\n')
- input[length++] = '\n';
- input[length++] = '\0';
- segmenter_init (&s, mode);
+ if (strip_trailing_newline && length && input[length - 1] == '\n')
+ {
+ length--;
+ if (length && input[length - 1] == '\r')
+ length--;
+ }
- line_number = 1;
- line_offset = 0;
- prev_type = -1;
- for (offset = 0; offset < length; )
+ if (!check_truncations)
+ check_segmentation (input, length, true);
+ else
+ {
+ size_t test_len;
+
+ for (test_len = 0; test_len <= length; test_len++)
+ {
+ char *copy = xmemdup (input, test_len);
+ check_segmentation (copy, test_len, false);
+ free (copy);
+ }
+ }
+ free (input);
+
+ return 0;
+}
+
+static void
+check_segmentation (const char *input, size_t length, bool print_segments)
+{
+ struct segmenter s = segmenter_init (mode, false);
+
+ size_t line_number = 1;
+ size_t line_offset = 0;
+ int prev_type = -1;
+ size_t offset = 0;
+ enum segment_type type;
+ do
{
- enum segment_type type;
const char *type_name, *p;
int n;
n_newlines++;
copy = xmemdup (input + offset, i);
- n = segmenter_push (&s, copy, i, &type);
+ n = segmenter_push (&s, copy, i, i + offset >= length, &type);
free (copy);
if (n >= 0)
assert (n_newlines <= 2);
}
else
- n = segmenter_push (&s, input + offset, length - offset, &type);
+ n = segmenter_push (&s, input + offset, length - offset, true, &type);
if (n < 0)
- error (EXIT_FAILURE, 0, "segmenter_push returned -1 at offset %zu",
- offset);
+ {
+ if (!print_segments)
+ check_segmentation (input, length, true);
+ else
+ error (EXIT_FAILURE, 0, "segmenter_push returned -1 at offset %zu",
+ offset);
+ }
assert (offset + n <= length);
if (type == SEG_NEWLINE)
- assert ((n == 1 && input[offset] == '\n')
- || (n == 2
- && input[offset] == '\r' && input[offset + 1] == '\n'));
- else
- assert (memchr (&input[offset], '\n', n) == NULL);
+ {
+ if (n == 1 ? input[offset] != '\n'
+ : n == 2 ? input[offset] != '\r' || input[offset + 1] != '\n'
+ : false)
+ error (EXIT_FAILURE, 0, "NEWLINE segment at offset %zu contains "
+ "non-newline content \"%.*s\"", offset, n, &input[offset]);
+ }
+ else if (memchr (&input[offset], '\n', n))
+ error (EXIT_FAILURE, 0, "%s segment \"%.*s\" contains new-line",
+ segment_type_to_string (type), n, &input[offset]);
+
+ if (!print_segments)
+ {
+ offset += n;
+ continue;
+ }
if (!verbose)
{
for (i = MIN (15, strlen (type_name)); i < 16; i++)
putchar (' ');
- for (i = 0; i < n; )
+ for (i = 0; i < n;)
{
const uint8_t *u_input = CHAR_CAST (const uint8_t *, input);
ucs4_t uc;
prompt = segmenter_get_prompt (&s);
printf (" (%s)\n", prompt_style_to_string (prompt));
}
+ fflush (stdout);
}
- putchar ('\n');
-
- free (input);
+ while (type != SEG_END);
- return 0;
+ if (print_segments)
+ putchar ('\n');
}
static const char *
static const struct option options[] =
{
{"one-byte", no_argument, NULL, '1'},
+ {"truncations", no_argument, NULL, '0'},
+ {"strip-trailing-newline", no_argument, NULL, 's'},
{"auto", no_argument, NULL, 'a'},
{"batch", no_argument, NULL, 'b'},
{"interactive", no_argument, NULL, 'i'},
{NULL, 0, NULL, 0},
};
- int c = getopt_long (argc, argv, "1abivh", options, NULL);
+ int c = getopt_long (argc, argv, "01abivhs", options, NULL);
if (c == -1)
break;
one_byte = true;
break;
+ case '0':
+ check_truncations = true;
+ break;
+
+ case 's':
+ strip_trailing_newline = true;
+ break;
+
case 'a':
mode = SEG_MODE_AUTO;
break;
\n\
Options:\n\
-1, --one-byte feed one byte at a time\n\
+ -0, --truncations check null truncation of each prefix of input\n\
+ -s, --strip-trailing-newline remove newline from end of input\n\
-a, --auto use \"auto\" syntax mode\n\
-b, --batch use \"batch\" syntax mode\n\
-i, --interactive use \"interactive\" syntax mode (default)\n\