1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2010, 2011, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
28 #include "libpspp/assertion.h"
29 #include "libpspp/cast.h"
30 #include "libpspp/compiler.h"
31 #include "libpspp/misc.h"
32 #include "language/lexer/segment.h"
35 #include "gl/minmax.h"
36 #include "gl/progname.h"
37 #include "gl/read-file.h"
38 #include "gl/xalloc.h"
39 #include "gl/xmemdup0.h"
41 /* -a/--auto, -b/--batch, -i/--interactive: syntax mode. */
42 static enum segmenter_mode mode = SEG_MODE_AUTO;
44 /* -v, --verbose: Print row and column information. */
47 /* -1, --one-byte: Feed in one byte at a time? */
50 /* -0, --truncations: Check that every truncation of input yields a result. */
51 static bool check_truncations;
53 /* -s, --strip-trailing-newline: Strip trailing newline from last line of
55 static bool strip_trailing_newline;
57 static const char *parse_options (int argc, char **argv);
58 static void usage (void) NO_RETURN;
60 static void check_segmentation (const char *input, size_t length,
64 main (int argc, char *argv[])
66 const char *file_name;
70 set_program_name (argv[0]);
71 file_name = parse_options (argc, argv);
73 setvbuf (stdout, NULL, _IONBF, 0);
75 /* Read from stdin into 'input'. Ensure that 'input' ends in a new-line
76 followed by a null byte. */
77 input = (!strcmp (file_name, "-")
78 ? fread_file (stdin, 0, &length)
79 : read_file (file_name, 0, &length));
81 error (EXIT_FAILURE, errno, "reading %s failed", file_name);
83 if (strip_trailing_newline && length && input[length - 1] == '\n')
86 if (length && input[length - 1] == '\r')
90 if (!check_truncations)
91 check_segmentation (input, length, true);
96 for (test_len = 0; test_len <= length; test_len++)
98 char *copy = xmemdup (input, test_len);
99 check_segmentation (copy, test_len, false);
109 check_segmentation (const char *input, size_t length, bool print_segments)
111 struct segmenter s = segmenter_init (mode, false);
113 size_t line_number = 1;
114 size_t line_offset = 0;
117 enum segment_type type;
120 const char *type_name, *p;
128 for (i = 0; i <= length - offset; i++)
130 /* Make a copy to ensure that segmenter_push() isn't actually
134 if (i > 0 && input[offset + i - 1] == '\n')
137 copy = xmemdup (input + offset, i);
138 n = segmenter_push (&s, copy, i, i + offset >= length, &type);
144 assert (n_newlines <= 2);
147 n = segmenter_push (&s, input + offset, length - offset, true, &type);
152 check_segmentation (input, length, true);
154 error (EXIT_FAILURE, 0, "segmenter_push returned -1 at offset %zu",
157 assert (offset + n <= length);
158 assert (offset <= length);
160 if (type == SEG_NEWLINE)
162 if (n == 1 ? input[offset] != '\n'
163 : n == 2 ? input[offset] != '\r' || input[offset + 1] != '\n'
165 error (EXIT_FAILURE, 0, "NEWLINE segment at offset %zu contains "
166 "non-newline content \"%.*s\"", offset, n, &input[offset]);
168 else if (memchr (&input[offset], '\n', n))
169 error (EXIT_FAILURE, 0, "%s segment \"%.*s\" contains new-line",
170 segment_type_to_string (type), n, &input[offset]);
180 if (prev_type != SEG_SPACES && prev_type != -1
181 && type == SEG_SPACES && n == 1 && input[offset] == ' ')
194 printf ("%2zu:%2zu: ", line_number, offset - line_offset);
196 type_name = segment_type_to_string (type);
197 for (p = type_name; *p != '\0'; p++)
198 putchar (tolower ((unsigned char) *p));
203 for (i = MIN (15, strlen (type_name)); i < 16; i++)
207 const uint8_t *u_input = CHAR_CAST (const uint8_t *, input);
211 mblen = u8_mbtoucr (&uc, u_input + (offset + i), n - i);
216 mblen = u8_mbtouc (&uc, u_input + (offset + i), n - i);
218 for (j = 0; j < mblen; j++)
222 printf ("%02x", input[offset + i + j]);
259 if (uc < 0x20 || uc == 0x00a0)
260 printf ("<U+%04X>", uc);
262 fwrite (input + offset + i, 1, mblen, stdout);
272 if (type == SEG_NEWLINE)
274 enum prompt_style prompt;
277 line_offset = offset;
279 prompt = segmenter_get_prompt (&s);
280 printf (" (%s)\n", prompt_style_to_string (prompt));
284 while (type != SEG_END);
291 parse_options (int argc, char **argv)
295 static const struct option options[] =
297 {"one-byte", no_argument, NULL, '1'},
298 {"truncations", no_argument, NULL, '0'},
299 {"strip-trailing-newline", no_argument, NULL, 's'},
300 {"auto", no_argument, NULL, 'a'},
301 {"batch", no_argument, NULL, 'b'},
302 {"interactive", no_argument, NULL, 'i'},
303 {"verbose", no_argument, NULL, 'v'},
304 {"help", no_argument, NULL, 'h'},
308 int c = getopt_long (argc, argv, "01abivhs", options, NULL);
319 check_truncations = true;
323 strip_trailing_newline = true;
327 mode = SEG_MODE_AUTO;
331 mode = SEG_MODE_BATCH;
335 mode = SEG_MODE_INTERACTIVE;
358 if (optind + 1 != argc)
359 error (1, 0, "exactly one non-option argument required; "
360 "use --help for help");
368 %s, to test breaking PSPP syntax into lexical segments\n\
369 usage: %s [OPTIONS] INPUT\n\
372 -1, --one-byte feed one byte at a time\n\
373 -0, --truncations check null truncation of each prefix of input\n\
374 -s, --strip-trailing-newline remove newline from end of input\n\
375 -a, --auto use \"auto\" syntax mode\n\
376 -b, --batch use \"batch\" syntax mode\n\
377 -i, --interactive use \"interactive\" syntax mode (default)\n\
378 -v, --verbose include rows and column numbers in output\n\
379 -h, --help print this help message\n",
380 program_name, program_name);