/* PSPP - a program for statistical analysis.
- Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2010, 2011, 2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "gl/c-ctype.h"
#include "gl/c-strcase.h"
+#include "gl/memchr2.h"
enum segmenter_state
{
int ofs;
for (ofs = 2; ofs < n; ofs++)
- if (input[ofs] == '\n')
+ if (input[ofs] == '\n' || input[ofs] == '\0')
{
- if (input[ofs - 1] == '\r')
+ if (input[ofs] == '\n' && input[ofs - 1] == '\r')
ofs--;
s->state = S_GENERAL;
{
for (; ofs < n; ofs++)
{
- if (input[ofs] == '\n')
+ if (input[ofs] == '\n' || input[ofs] == '\0')
return ofs;
else if (input[ofs] == '*')
{
static int
is_end_of_line (const char *input, size_t n, int ofs)
{
- if (input[ofs] == '\n')
+ if (input[ofs] == '\n' || input[ofs] == '\0')
return 1;
else if (input[ofs] == '\r')
{
return is_end_of_line (input, n, ofs);
}
-
static int
segmenter_parse_newline__ (const char *input, size_t n,
enum segment_type *type)
if (mblen < 0)
return -1;
- if (!lex_uc_is_space (uc) || uc == '\n')
+ if (!lex_uc_is_space (uc) || uc == '\n' || uc == '\0')
return ofs;
ofs += mblen;
case '\n':
if (ofs > 1 && input[ofs - 1] == '\r')
ofs--;
-
- if (endcmd == -2)
+ /* Fall through. */
+ case '\0':
+ if (endcmd == -2 || uc == '\0')
{
/* Blank line ends comment command. */
s->state = S_GENERAL;
s->state = end_cmd ? S_DOCUMENT_3 : S_DOCUMENT_2;
return ofs;
+ case '\0':
+ *type = SEG_DOCUMENT;
+ s->state = S_DOCUMENT_3;
+ return ofs;
+
default:
if (!lex_uc_is_space (uc))
end_cmd = false;
if (mblen < 0)
return -1;
- if (uc == '\n'
+ if (uc == '\n' || uc == '\0'
|| !(lex_uc_is_space (uc) || lex_uc_is_idn (uc) || uc == '-'))
break;
segmenter_parse_full_line__ (const char *input, size_t n,
enum segment_type *type)
{
- const char *newline = memchr (input, '\n', n);
+ const char *newline = memchr2 (input, '\n', '\0', n);
if (newline == NULL)
return -1;
else
{
int ofs = newline - input;
- if (ofs == 0 || (ofs == 1 && input[0] == '\r'))
+ if (*newline == '\0')
+ {
+ assert (ofs > 0);
+ return ofs;
+ }
+ else if (ofs == 0 || (ofs == 1 && input[0] == '\r'))
{
*type = SEG_NEWLINE;
return ofs + 1;
switch (uc)
{
case '\n':
+ case '\0':
s->state = S_GENERAL;
s->substate = 0;
*type = SEG_UNQUOTED_STRING;
/* PSPP - a program for statistical analysis.
- Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2010, 2011, 2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "gl/progname.h"
#include "gl/read-file.h"
#include "gl/xalloc.h"
+#include "gl/xmemdup0.h"
/* -a/--auto, -b/--batch, -i/--interactive: syntax mode. */
static enum segmenter_mode mode = SEG_MODE_AUTO;
/* -1, --one-byte: Feed in one byte at a time? */
static bool one_byte;
+/* -0, --truncations: Check that every truncation of input yields a result. */
+static bool check_truncations;
+
static const char *parse_options (int argc, char **argv);
static void usage (void) NO_RETURN;
+static void check_segmentation (const char *input, size_t length,
+ bool print_segments);
+
int
main (int argc, char *argv[])
{
- size_t offset, line_number, line_offset;
const char *file_name;
- char *input;
- struct segmenter s;
size_t length;
- int prev_type;
+ char *input;
set_program_name (argv[0]);
file_name = parse_options (argc, argv);
: read_file (file_name, &length));
if (input == NULL)
error (EXIT_FAILURE, errno, "reading %s failed", file_name);
- input = xrealloc (input, length + 3);
- if (length == 0 || input[length - 1] != '\n')
- input[length++] = '\n';
- input[length++] = '\0';
+
+ if (!check_truncations)
+ {
+ input = xrealloc (input, length + 3);
+ if (length == 0 || input[length - 1] != '\n')
+ input[length++] = '\n';
+ input[length++] = '\0';
+
+ check_segmentation (input, length, true);
+ }
+ else
+ {
+ size_t test_len;
+
+ for (test_len = 0; test_len <= length; test_len++)
+ {
+ char *copy = xmemdup0 (input, test_len);
+ check_segmentation (copy, test_len + 1, false);
+ free (copy);
+ }
+ }
+ free (input);
+
+ return 0;
+}
+
+static void
+check_segmentation (const char *input, size_t length, bool print_segments)
+{
+ size_t offset, line_number, line_offset;
+ struct segmenter s;
+ int prev_type;
segmenter_init (&s, mode);
else
assert (memchr (&input[offset], '\n', n) == NULL);
+ if (!print_segments)
+ {
+ offset += n;
+ continue;
+ }
+
if (!verbose)
{
if (prev_type != SEG_SPACES && prev_type != -1
printf (" (%s)\n", prompt_style_to_string (prompt));
}
}
- putchar ('\n');
-
- free (input);
- return 0;
+ if (print_segments)
+ putchar ('\n');
}
static const char *
static const struct option options[] =
{
{"one-byte", no_argument, NULL, '1'},
+ {"truncations", no_argument, NULL, '0'},
{"auto", no_argument, NULL, 'a'},
{"batch", no_argument, NULL, 'b'},
{"interactive", no_argument, NULL, 'i'},
{NULL, 0, NULL, 0},
};
- int c = getopt_long (argc, argv, "1abivh", options, NULL);
+ int c = getopt_long (argc, argv, "01abivh", options, NULL);
if (c == -1)
break;
one_byte = true;
break;
+ case '0':
+ check_truncations = true;
+ break;
+
case 'a':
mode = SEG_MODE_AUTO;
break;
\n\
Options:\n\
-1, --one-byte feed one byte at a time\n\
+ -0, --truncations check null truncation of each prefix of input\n\
-a, --auto use \"auto\" syntax mode\n\
-b, --batch use \"batch\" syntax mode\n\
-i, --interactive use \"interactive\" syntax mode (default)\n\