/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
{
reader->class = class;
reader->syntax = LEX_SYNTAX_AUTO;
- reader->error = LEX_ERROR_INTERACTIVE;
+ reader->error = LEX_ERROR_CONTINUE;
reader->file_name = NULL;
+ reader->encoding = NULL;
reader->line_number = 0;
}
va_end (args);
}
-/* Reports an error to the effect that subcommand SBC may only be
- specified once. */
+/* Prints a syntax error message saying that OPTION0 or one of the other
+ strings following it, up to the first NULL, is expected. */
+void
+lex_error_expecting (struct lexer *lexer, const char *option0, ...)
+{
+ enum { MAX_OPTIONS = 8 };
+ const char *options[MAX_OPTIONS + 1];
+ va_list args;
+ int n;
+
+ va_start (args, option0);
+ options[0] = option0;
+ n = 0;
+ while (n + 1 < MAX_OPTIONS && options[n] != NULL)
+ options[++n] = va_arg (args, const char *);
+ va_end (args);
+
+ switch (n)
+ {
+ case 0:
+ lex_error (lexer, NULL);
+ break;
+
+ case 1:
+ lex_error (lexer, _("expecting %s"), options[0]);
+ break;
+
+ case 2:
+ lex_error (lexer, _("expecting %s or %s"), options[0], options[1]);
+ break;
+
+ case 3:
+ lex_error (lexer, _("expecting %s, %s, or %s"), options[0], options[1],
+ options[2]);
+ break;
+
+ case 4:
+ lex_error (lexer, _("expecting %s, %s, %s, or %s"),
+ options[0], options[1], options[2], options[3]);
+ break;
+
+ case 5:
+ lex_error (lexer, _("expecting %s, %s, %s, %s, or %s"),
+ options[0], options[1], options[2], options[3], options[4]);
+ break;
+
+ case 6:
+ lex_error (lexer, _("expecting %s, %s, %s, %s, %s, or %s"),
+ options[0], options[1], options[2], options[3], options[4],
+ options[5]);
+ break;
+
+ case 7:
+ lex_error (lexer, _("expecting %s, %s, %s, %s, %s, %s, or %s"),
+ options[0], options[1], options[2], options[3], options[4],
+ options[5], options[6]);
+ break;
+
+ case 8:
+ lex_error (lexer, _("expecting %s, %s, %s, %s, %s, %s, %s, or %s"),
+ options[0], options[1], options[2], options[3], options[4],
+ options[5], options[6], options[7]);
+ break;
+
+ default:
+ NOT_REACHED ();
+ }
+}
+
+/* Reports an error to the effect that subcommand SBC may only be specified
+ once.
+
+ This function does not take a lexer as an argument or use lex_error(),
+ because the result would ordinarily just be redundant: "Syntax error at
+ SUBCOMMAND: Subcommand SUBCOMMAND may only be specified once.", which does
+ not help the user find the error. */
void
lex_sbc_only_once (const char *sbc)
{
msg (SE, _("Subcommand %s may only be specified once."), sbc);
}
-/* Reports an error to the effect that subcommand SBC is
- missing. */
+/* Reports an error to the effect that subcommand SBC is missing.
+
+ This function does not take a lexer as an argument or use lex_error(),
+ because a missing subcommand can normally be detected only after the whole
+ command has been parsed, and so lex_error() would always report "Syntax
+ error at end of command", which does not help the user find the error. */
+void
+lex_sbc_missing (const char *sbc)
+{
+ msg (SE, _("Required subcommand %s was not specified."), sbc);
+}
+
+/* Reports an error to the effect that specification SPEC may only be specified
+ once within subcommand SBC. */
void
-lex_sbc_missing (struct lexer *lexer, const char *sbc)
+lex_spec_only_once (struct lexer *lexer, const char *sbc, const char *spec)
{
- lex_error (lexer, _("missing required subcommand %s"), sbc);
+ lex_error (lexer, _("%s may only be specified once within subcommand %s"),
+ spec, sbc);
+}
+
+/* Reports an error to the effect that specification SPEC is missing within
+ subcommand SBC. */
+void
+lex_spec_missing (struct lexer *lexer, const char *sbc, const char *spec)
+{
+ lex_error (lexer, _("Required %s specification missing from %s subcommand"),
+ sbc, spec);
}
/* Prints a syntax error message containing the current token and
return true;
else
{
- lex_error (lexer, _("expecting `%s'"), identifier);
+ lex_error_expecting (lexer, identifier, NULL_SENTINEL);
return false;
}
}
}
else
{
- lex_error (lexer, _("expecting `%s'"), token_type_to_string (type));
+ char *s = xasprintf ("`%s'", token_type_to_string (type));
+ lex_error_expecting (lexer, s, NULL_SENTINEL);
+ free (s);
return false;
}
}
}
}
+/* If the current token is a string or an identifier, does nothing and returns
+ true. Otherwise, reports an error and returns false.
+
+ This is meant for use in syntactic situations where we want to encourage the
+ user to supply a quoted string, but for compatibility we also accept
+ identifiers. (One example of such a situation is file names.) Therefore,
+ the error message issued when the current token is wrong only says that a
+ string is expected and doesn't mention that an identifier would also be
+ accepted. */
+bool
+lex_force_string_or_id (struct lexer *lexer)
+{
+ return lex_is_integer (lexer) || lex_force_string (lexer);
+}
+
/* If the current token is an integer, does nothing and returns true.
Otherwise, reports an error and returns false. */
bool
return lex_next (lexer, n)->string;
}
-/* If LEXER is positioned at the (pseudo)identifier S, skips it and returns
- true. Otherwise, returns false.
-
- S may consist of an arbitrary number of identifiers, integers, and
- punctuation e.g. "KRUSKAL-WALLIS", "2SLS", or "END INPUT PROGRAM".
- Identifiers may be abbreviated to their first three letters. Currently only
- hyphens, slashes, and equals signs are supported as punctuation (but it
- would be easy to add more).
-
- S must be an ASCII string. */
-bool
-lex_match_phrase (struct lexer *lexer, const char *s)
+static bool
+lex_tokens_match (const struct token *actual, const struct token *expected)
{
- int tok_idx;
+ if (actual->type != expected->type)
+ return false;
- for (tok_idx = 0; ; tok_idx++)
+ switch (actual->type)
{
- enum token_type token;
- unsigned char c;
-
- while (c_isspace (*s))
- s++;
-
- c = *s;
- if (c == '\0')
- {
- int i;
-
- for (i = 0; i < tok_idx; i++)
- lex_get (lexer);
- return true;
- }
-
- token = lex_next_token (lexer, tok_idx);
- switch (c)
- {
- case '-':
- if (token != T_DASH)
- return false;
- s++;
- break;
+ case T_POS_NUM:
+ case T_NEG_NUM:
+ return actual->number == expected->number;
- case '/':
- if (token != T_SLASH)
- return false;
- s++;
- break;
+ case T_ID:
+ return lex_id_match (expected->string, actual->string);
- case '=':
- if (token != T_EQUALS)
- return false;
- s++;
- break;
+ case T_STRING:
+ return (actual->string.length == expected->string.length
+ && !memcmp (actual->string.string, expected->string.string,
+ actual->string.length));
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- {
- unsigned int value;
-
- if (token != T_POS_NUM)
- return false;
-
- value = 0;
- do
- {
- value = value * 10 + (*s++ - '0');
- }
- while (c_isdigit (*s));
-
- if (lex_next_tokval (lexer, tok_idx) != value)
- return false;
- }
- break;
+ default:
+ return true;
+ }
+}
- default:
- if (lex_is_id1 (c))
- {
- int len;
+/* If LEXER is positioned at the sequence of tokens that may be parsed from S,
+ skips it and returns true. Otherwise, returns false.
- if (token != T_ID)
- return false;
+ S may consist of an arbitrary sequence of tokens, e.g. "KRUSKAL-WALLIS",
+ "2SLS", or "END INPUT PROGRAM". Identifiers may be abbreviated to their
+ first three letters. */
+bool
+lex_match_phrase (struct lexer *lexer, const char *s)
+{
+ struct string_lexer slex;
+ struct token token;
+ int i;
- len = lex_id_get_length (ss_cstr (s));
- if (!lex_id_match (ss_buffer (s, len),
- lex_next_tokss (lexer, tok_idx)))
- return false;
+ i = 0;
+ string_lexer_init (&slex, s, SEG_MODE_INTERACTIVE);
+ while (string_lexer_next (&slex, &token))
+ if (token.type != SCAN_SKIP)
+ {
+ bool match = lex_tokens_match (lex_next (lexer, i++), &token);
+ token_destroy (&token);
+ if (!match)
+ return false;
+ }
- s += len;
- }
- else
- NOT_REACHED ();
- }
- }
+ while (i-- > 0)
+ lex_get (lexer);
+ return true;
}
static int
return src == NULL ? NULL : src->reader->file_name;
}
+const char *
+lex_get_encoding (const struct lexer *lexer)
+{
+ struct lex_source *src = lex_source__ (lexer);
+ return src == NULL ? NULL : src->reader->encoding;
+}
+
+
/* Returns the syntax mode for the syntax file from which the current drawn is
drawn. Returns LEX_SYNTAX_AUTO for a T_STOP token or if the command's
source does not have line numbers.
}
/* Returns the error mode for the syntax file from which the current drawn is
- drawn. Returns LEX_ERROR_INTERACTIVE for a T_STOP token or if the command's
+ drawn. Returns LEX_ERROR_TERMINAL for a T_STOP token or if the command's
source does not have line numbers.
There is no version of this function that takes an N argument because
lex_get_error_mode (const struct lexer *lexer)
{
struct lex_source *src = lex_source__ (lexer);
- return src == NULL ? LEX_ERROR_INTERACTIVE : src->reader->error;
+ return src == NULL ? LEX_ERROR_TERMINAL : src->reader->error;
}
/* If the source that LEXER is currently reading has error mode
- LEX_ERROR_INTERACTIVE, discards all buffered input and tokens, so that the
- next token to be read comes directly from whatever is next read from the
- stream.
+ LEX_ERROR_TERMINAL, discards all buffered input and tokens, so that the next
+ token to be read comes directly from whatever is next read from the stream.
It makes sense to call this function after encountering an error in a
command entered on the console, because usually the user would prefer not to
lex_interactive_reset (struct lexer *lexer)
{
struct lex_source *src = lex_source__ (lexer);
- if (src != NULL && src->reader->error == LEX_ERROR_INTERACTIVE)
+ if (src != NULL && src->reader->error == LEX_ERROR_TERMINAL)
{
src->head = src->tail = 0;
src->journal_pos = src->seg_pos = src->line_pos = 0;
}
/* Discards all lookahead tokens in LEXER, then discards all input sources
- until it encounters one with error mode LEX_ERROR_INTERACTIVE or until it
+ until it encounters one with error mode LEX_ERROR_TERMINAL or until it
runs out of input sources. */
void
lex_discard_noninteractive (struct lexer *lexer)
while (!deque_is_empty (&src->deque))
lex_source_pop__ (src);
- for (; src != NULL && src->reader->error != LEX_ERROR_INTERACTIVE;
+ for (; src != NULL && src->reader->error != LEX_ERROR_TERMINAL;
src = lex_source__ (lexer))
lex_source_destroy (src);
}
do
{
size_t head_ofs;
+ size_t space;
size_t n;
lex_source_expand__ (src);
head_ofs = src->head - src->tail;
+ space = src->allocated - head_ofs;
n = src->reader->class->read (src->reader, &src->buffer[head_ofs],
- src->allocated - head_ofs,
+ space,
segmenter_get_prompt (&src->segmenter));
+ assert (n <= space);
+
if (n == 0)
{
/* End of input.
lex_source_destroy (struct lex_source *src)
{
char *file_name = src->reader->file_name;
+ char *encoding = src->reader->encoding;
if (src->reader->class->destroy != NULL)
src->reader->class->destroy (src->reader);
free (file_name);
+ free (encoding);
free (src->buffer);
while (!deque_is_empty (&src->deque))
lex_source_pop__ (src);
{
struct lex_reader reader;
struct u8_istream *istream;
- char *file_name;
};
static struct lex_reader_class lex_file_reader_class;
r->reader.syntax = syntax;
r->reader.error = error;
r->reader.file_name = xstrdup (file_name);
+ r->reader.encoding = encoding ? xstrdup (encoding) : NULL;
r->reader.line_number = 1;
r->istream = istream;
- r->file_name = xstrdup (file_name);
return &r->reader;
}
ssize_t n_read = u8_istream_read (r->istream, buf, n);
if (n_read < 0)
{
- msg (ME, _("Error reading `%s': %s."), r->file_name, strerror (errno));
+ msg (ME, _("Error reading `%s': %s."), r_->file_name, strerror (errno));
return 0;
}
return n_read;
if (u8_istream_fileno (r->istream) != STDIN_FILENO)
{
if (u8_istream_close (r->istream) != 0)
- msg (ME, _("Error closing `%s': %s."), r->file_name, strerror (errno));
+ msg (ME, _("Error closing `%s': %s."), r_->file_name, strerror (errno));
}
else
u8_istream_free (r->istream);
- free (r->file_name);
free (r);
}
static struct lex_reader_class lex_string_reader_class;
/* Creates and returns a new lex_reader for the contents of S, which must be
- encoded in UTF-8. The new reader takes ownership of S and will free it
+ encoded in the given ENCODING. The new reader takes ownership of S and will free it
with ss_dealloc() when it is closed. */
struct lex_reader *
-lex_reader_for_substring_nocopy (struct substring s)
+lex_reader_for_substring_nocopy (struct substring s, const char *encoding)
{
struct lex_string_reader *r;
r = xmalloc (sizeof *r);
lex_reader_init (&r->reader, &lex_string_reader_class);
- r->reader.syntax = LEX_SYNTAX_INTERACTIVE;
+ r->reader.syntax = LEX_SYNTAX_AUTO;
+ r->reader.encoding = encoding ? xstrdup (encoding) : NULL;
r->s = s;
r->offset = 0;
}
/* Creates and returns a new lex_reader for a copy of null-terminated string S,
- which must be encoded in UTF-8. The caller retains ownership of S. */
+ which must be encoded in ENCODING. The caller retains ownership of S. */
struct lex_reader *
-lex_reader_for_string (const char *s)
+lex_reader_for_string (const char *s, const char *encoding)
{
struct substring ss;
ss_alloc_substring (&ss, ss_cstr (s));
- return lex_reader_for_substring_nocopy (ss);
+ return lex_reader_for_substring_nocopy (ss, encoding);
}
/* Formats FORMAT as a printf()-like format string and creates and returns a
new lex_reader for the formatted result. */
struct lex_reader *
-lex_reader_for_format (const char *format, ...)
+lex_reader_for_format (const char *format, const char *encoding, ...)
{
struct lex_reader *r;
va_list args;
- va_start (args, format);
- r = lex_reader_for_substring_nocopy (ss_cstr (xvasprintf (format, args)));
+ va_start (args, encoding);
+ r = lex_reader_for_substring_nocopy (ss_cstr (xvasprintf (format, args)), encoding);
va_end (args);
return r;