#include "libpspp/str.h"
#include "libpspp/u8-istream.h"
#include "output/journal.h"
-#include "output/text-item.h"
+#include "output/output-item.h"
#include "gl/c-ctype.h"
#include "gl/minmax.h"
const struct lex_reader_class *class)
{
reader->class = class;
- reader->syntax = LEX_SYNTAX_AUTO;
+ reader->syntax = SEG_MODE_AUTO;
reader->error = LEX_ERROR_CONTINUE;
reader->file_name = NULL;
reader->encoding = NULL;
lex_reader_set_file_name (struct lex_reader *reader, const char *file_name)
{
free (reader->file_name);
- reader->file_name = file_name != NULL ? xstrdup (file_name) : NULL;
+ reader->file_name = xstrdup_if_nonnull (file_name);
}
\f
/* Creates and returns a new lexer. */
va_end (args);
}
-/* Prints a syntax error message saying that OPTION0 or one of the other
- strings following it, up to the first NULL, is expected. */
+/* Prints a syntax error message saying that one of the strings provided as
+ varargs, up to the first NULL, is expected. */
void
-lex_error_expecting (struct lexer *lexer, const char *option0, ...)
+(lex_error_expecting) (struct lexer *lexer, ...)
{
- enum { MAX_OPTIONS = 8 };
- const char *options[MAX_OPTIONS + 1];
va_list args;
- int n;
- va_start (args, option0);
- options[0] = option0;
- n = 0;
- while (n + 1 < MAX_OPTIONS && options[n] != NULL)
- options[++n] = va_arg (args, const char *);
+ va_start (args, lexer);
+ lex_error_expecting_valist (lexer, args);
va_end (args);
+}
+
+/* Prints a syntax error message saying that one of the options provided in
+ ARGS, up to the first NULL, is expected. */
+void
+lex_error_expecting_valist (struct lexer *lexer, va_list args)
+{
+ enum { MAX_OPTIONS = 9 };
+ const char *options[MAX_OPTIONS];
+ int n = 0;
+ while (n < MAX_OPTIONS)
+ {
+ const char *option = va_arg (args, const char *);
+ if (!option)
+ break;
+ options[n++] = option;
+ }
+ lex_error_expecting_array (lexer, options, n);
+}
+
+void
+lex_error_expecting_array (struct lexer *lexer, const char **options, size_t n)
+{
switch (n)
{
case 0:
break;
default:
- NOT_REACHED ();
+ lex_error (lexer, NULL);
}
}
return true;
else
{
- lex_error_expecting (lexer, identifier, NULL_SENTINEL);
+ lex_error_expecting (lexer, identifier);
return false;
}
}
if (type_string)
{
char *s = xasprintf ("`%s'", type_string);
- lex_error_expecting (lexer, s, NULL_SENTINEL);
+ lex_error_expecting (lexer, s);
free (s);
}
else
- lex_error_expecting (lexer, token_type_to_name (type), NULL_SENTINEL);
+ lex_error_expecting (lexer, token_type_to_name (type));
return false;
}
}
}
+/* If the current token is an integer in the range MIN...MAX (inclusive), does
+ nothing and returns true. Otherwise, reports an error and returns false.
+ If NAME is nonnull, then it is used in the error message. */
+bool
+lex_force_int_range (struct lexer *lexer, const char *name, long min, long max)
+{
+ bool is_integer = lex_is_integer (lexer);
+ bool too_small = is_integer && lex_integer (lexer) < min;
+ bool too_big = is_integer && lex_integer (lexer) > max;
+ if (is_integer && !too_small && !too_big)
+ return true;
+
+ if (min > max)
+ {
+ /* Weird, maybe a bug in the caller. Just report that we needed an
+ integer. */
+ if (name)
+ lex_error (lexer, _("Integer expected for %s."), name);
+ else
+ lex_error (lexer, _("Integer expected."));
+ }
+ else if (min == max)
+ {
+ if (name)
+ lex_error (lexer, _("Expected %ld for %s."), min, name);
+ else
+ lex_error (lexer, _("Expected %ld."), min);
+ }
+ else if (min + 1 == max)
+ {
+ if (name)
+ lex_error (lexer, _("Expected %ld or %ld for %s."), min, min + 1, name);
+ else
+ lex_error (lexer, _("Expected %ld or %ld."), min, min + 1);
+ }
+ else
+ {
+ bool report_lower_bound = (min > INT_MIN / 2) || too_small;
+ bool report_upper_bound = (max < INT_MAX / 2) || too_big;
+
+ if (report_lower_bound && report_upper_bound)
+ {
+ if (name)
+ lex_error (lexer,
+ _("Expected integer between %ld and %ld for %s."),
+ min, max, name);
+ else
+ lex_error (lexer, _("Expected integer between %ld and %ld."),
+ min, max);
+ }
+ else if (report_lower_bound)
+ {
+ if (min == 0)
+ {
+ if (name)
+ lex_error (lexer, _("Expected non-negative integer for %s."),
+ name);
+ else
+ lex_error (lexer, _("Expected non-negative integer."));
+ }
+ else if (min == 1)
+ {
+ if (name)
+ lex_error (lexer, _("Expected positive integer for %s."),
+ name);
+ else
+ lex_error (lexer, _("Expected positive integer."));
+ }
+ }
+ else if (report_upper_bound)
+ {
+ if (name)
+ lex_error (lexer,
+ _("Expected integer less than or equal to %ld for %s."),
+ max, name);
+ else
+ lex_error (lexer, _("Expected integer less than or equal to %ld."),
+ max);
+ }
+ else
+ {
+ if (name)
+ lex_error (lexer, _("Integer expected for %s."), name);
+ else
+ lex_error (lexer, _("Integer expected."));
+ }
+ }
+ return false;
+}
+
/* If the current token is a number, does nothing and returns true.
Otherwise, reports an error and returns false. */
bool
The string is null-terminated (but the null terminator is not included in
the returned substring's 'length').
- Only T_ID and T_STRING tokens have meaningful strings. For other tokens
- this functions this function will always return NULL.
+ Only T_ID, T_MACRO_ID, T_STRING tokens have meaningful strings. For other
+ tokens this functions this function will always return NULL.
The UTF-8 encoding of the returned string is correct for variable names and
other identifiers. Use filename_to_utf8() to use it as a filename. Use
/* Returns the syntax mode for the syntax file from which the current drawn is
- drawn. Returns LEX_SYNTAX_AUTO for a T_STOP token or if the command's
- source does not have line numbers.
+ drawn. Returns SEG_MODE_AUTO for a T_STOP token or if the command's source
+ does not have line numbers.
There is no version of this function that takes an N argument because
lookahead only works to the end of a command and any given command is always
within a single syntax file. */
-enum lex_syntax_mode
+enum segmenter_mode
lex_get_syntax_mode (const struct lexer *lexer)
{
struct lex_source *src = lex_source__ (lexer);
- return src == NULL ? LEX_SYNTAX_AUTO : src->reader->syntax;
+ return src == NULL ? SEG_MODE_AUTO : src->reader->syntax;
}
/* Returns the error mode for the syntax file from which the current drawn is
int mblen;
assert (out_size >= 16);
- out_maxlen = out_size - (in.length >= out_size ? 3 : 0) - 1;
+ out_maxlen = out_size - 1;
+ if (in.length > out_maxlen - 3)
+ out_maxlen -= 3;
+
for (out_len = 0; out_len < in.length; out_len += mblen)
{
if (in.string[out_len] == '\n'
mblen = u8_mblen (CHAR_CAST (const uint8_t *, in.string + out_len),
in.length - out_len);
+
+ if (mblen < 0)
+ break;
+
if (out_len + mblen > out_maxlen)
break;
}
ds_put_cstr (&s, ": ");
ds_put_vformat (&s, format, args);
}
- ds_put_byte (&s, '.');
+ if (ds_last (&s) != '.')
+ ds_put_byte (&s, '.');
struct msg m = {
.category = MSG_C_SYNTAX,
}
/* Attempts to append an additional token into SRC's deque, reading more from
- the underlying lex_reader if necessary.. Returns true if successful, false
+ the underlying lex_reader if necessary. Returns true if successful, false
if the deque already represents (a suffix of) the whole lex_reader's
contents, */
static bool
if (copy_len > 0 && line[copy_len - 1] == '\r')
copy_len--;
- /* Make a copy of the line with \n end-of-line and null terminator. */
- char *syntax = xmalloc (copy_len + 2);
- memcpy (syntax, line, copy_len);
- syntax[copy_len] = '\n';
- syntax[copy_len + 1] = '\0';
-
- text_item_submit (text_item_create_nocopy (TEXT_ITEM_SYNTAX, syntax));
+ /* Submit the line as syntax. */
+ output_item_submit (text_item_create_nocopy (TEXT_ITEM_SYNTAX,
+ xmemdup0 (line, copy_len),
+ NULL));
src->journal_pos += line_len;
}
token->token.string.string);
break;
- case SCAN_UNEXPECTED_DOT:
- lex_get_error (src, _("Unexpected `.' in middle of command"));
- break;
-
case SCAN_UNEXPECTED_CHAR:
{
char c_name[16];
lex_source_create (struct lex_reader *reader)
{
struct lex_source *src;
- enum segmenter_mode mode;
src = xzalloc (sizeof *src);
src->reader = reader;
-
- if (reader->syntax == LEX_SYNTAX_AUTO)
- mode = SEG_MODE_AUTO;
- else if (reader->syntax == LEX_SYNTAX_INTERACTIVE)
- mode = SEG_MODE_INTERACTIVE;
- else if (reader->syntax == LEX_SYNTAX_BATCH)
- mode = SEG_MODE_BATCH;
- else
- NOT_REACHED ();
- segmenter_init (&src->segmenter, mode);
-
+ segmenter_init (&src->segmenter, reader->syntax);
src->tokens = deque_init (&src->deque, 4, sizeof *src->tokens);
lex_source_push_endcmd__ (src);
Returns a null pointer if FILE_NAME cannot be opened. */
struct lex_reader *
lex_reader_for_file (const char *file_name, const char *encoding,
- enum lex_syntax_mode syntax,
+ enum segmenter_mode syntax,
enum lex_error_mode error)
{
struct lex_file_reader *r;
r->reader.syntax = syntax;
r->reader.error = error;
r->reader.file_name = xstrdup (file_name);
- r->reader.encoding = encoding ? xstrdup (encoding) : NULL;
+ r->reader.encoding = xstrdup_if_nonnull (encoding);
r->reader.line_number = 1;
r->istream = istream;
r = xmalloc (sizeof *r);
lex_reader_init (&r->reader, &lex_string_reader_class);
- r->reader.syntax = LEX_SYNTAX_AUTO;
- r->reader.encoding = encoding ? xstrdup (encoding) : NULL;
+ r->reader.syntax = SEG_MODE_AUTO;
+ r->reader.encoding = xstrdup_if_nonnull (encoding);
r->s = s;
r->offset = 0;