/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
- Written by Ben Pfaff <blp@gnu.org>.
+ Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
#include <libpspp/assertion.h>
#include <language/command.h>
#include <libpspp/message.h>
-#include <language/line-buffer.h>
#include <libpspp/magic.h>
#include <data/settings.h>
+#include <libpspp/getl.h>
#include <libpspp/str.h>
#include "size_max.h"
#define _(msgid) gettext (msgid)
#define N_(msgid) msgid
-/*
-#define DUMP_TOKENS 1
-*/
+
+#define DUMP_TOKENS 0
+
struct lexer
{
struct string line_buffer;
- bool (*read_line) (struct string *, bool *);
+ struct source_stream *ss;
int token; /* Current token. */
double tokval; /* T_POS_NUM, T_NEG_NUM: the token's value. */
char *prog; /* Pointer to next token in line_buffer. */
bool dot; /* True only if this line ends with a terminal dot. */
- bool eof; /* True only if the last token returned was T_STOP. */
int put_token ; /* If nonzero, next token returned by lex_get().
Used only in exceptional circumstances. */
static int parse_string (struct lexer *, enum string_type);
#if DUMP_TOKENS
-static void dump_token (void);
+static void dump_token (struct lexer *);
#endif
\f
/* Initialization. */
/* Initializes the lexer. */
struct lexer *
-lex_create (bool (*read_line_func) (struct string *, bool *))
+lex_create (struct source_stream *ss)
{
struct lexer *lexer = xzalloc (sizeof (*lexer));
ds_init_empty (&lexer->tokstr);
ds_init_empty (&lexer->put_tokstr);
ds_init_empty (&lexer->line_buffer);
- lexer->read_line = read_line_func;
-
- if (!lex_get_line (lexer))
- lexer->eof = true;
+ lexer->ss = ss;
return lexer;
}
+struct source_stream *
+lex_get_source_stream (const struct lexer *lex)
+{
+ return lex->ss;
+}
+
+
void
lex_destroy (struct lexer *lexer)
{
void
lex_get (struct lexer *lexer)
{
+ /* Find a token. */
+ for (;;)
+ {
+ if (NULL == lexer->prog && ! lex_get_line (lexer) )
+ {
+ lexer->token = T_STOP;
+ return;
+ }
+
/* If a token was pushed ahead, return it. */
if (lexer->put_token)
{
restore_token (lexer);
#if DUMP_TOKENS
- dump_token ();
+ dump_token (lexer);
#endif
return;
}
- /* Find a token. */
for (;;)
{
/* Skip whitespace. */
- if (lexer->eof)
- {
- lexer->token = T_STOP;
- return;
- }
-
- for (;;)
- {
while (isspace ((unsigned char) *lexer->prog))
lexer->prog++;
+
if (*lexer->prog)
break;
lexer->dot = 0;
lexer->token = '.';
#if DUMP_TOKENS
- dump_token ();
+ dump_token (lexer);
#endif
return;
}
else if (!lex_get_line (lexer))
{
- lexer->eof = true;
+ lexer->prog = NULL;
lexer->token = T_STOP;
#if DUMP_TOKENS
- dump_token ();
+ dump_token (lexer);
#endif
return;
}
{
restore_token (lexer);
#if DUMP_TOKENS
- dump_token ();
+ dump_token (lexer);
#endif
return;
}
}
#if DUMP_TOKENS
- dump_token ();
+ dump_token (lexer);
#endif
}
static int
parse_id (struct lexer *lexer)
{
- const char *start = lexer->prog;
- lexer->prog = lex_skip_identifier (start);
-
- ds_put_substring (&lexer->tokstr, ss_buffer (start, lexer->prog - start));
+ struct substring rest_of_line
+ = ss_substr (ds_ss (&lexer->line_buffer),
+ ds_pointer_to_position (&lexer->line_buffer, lexer->prog),
+ SIZE_MAX);
+ struct substring id = ss_head (rest_of_line,
+ lex_id_get_length (rest_of_line));
+ lexer->prog += ss_length (id);
+
+ ds_assign_substring (&lexer->tokstr, id);
str_copy_trunc (lexer->tokid, sizeof lexer->tokid, ds_cstr (&lexer->tokstr));
- return lex_id_to_token (ds_cstr (&lexer->tokstr), ds_length (&lexer->tokstr));
+ return lex_id_to_token (id);
}
/* Reports an error to the effect that subcommand SBC may only be
return lexer->token == T_POS_NUM || lexer->token == T_NEG_NUM;
}
+
+/* Returns true if the current token is a string. */
+bool
+lex_is_string (struct lexer *lexer)
+{
+ return lexer->token == T_STRING;
+}
+
+
/* Returns the value of the current token, which must be a
floating point number. */
double
bool
lex_match_id (struct lexer *lexer, const char *s)
{
- if (lexer->token == T_ID && lex_id_match (s, lexer->tokid))
+ if (lexer->token == T_ID
+ && lex_id_match (ss_cstr (s), ss_cstr (lexer->tokid)))
{
lex_get (lexer);
return true;
bool
lex_force_match_id (struct lexer *lexer, const char *s)
{
- if (lexer->token == T_ID && lex_id_match (s, lexer->tokid))
- {
- lex_get (lexer);
- return true;
- }
+ if (lex_match_id (lexer, s))
+ return true;
else
{
lex_error (lexer, _("expecting `%s'"), s);
for (;;)
{
- if (lexer->eof)
+ if (NULL == lexer->prog && ! lex_get_line (lexer) )
return 0;
for (;;)
void
lex_put_back_id (struct lexer *lexer, const char *id)
{
- assert (lex_id_to_token (id, strlen (id)) == T_ID);
+ assert (lex_id_to_token (ss_cstr (id)) == T_ID);
save_token (lexer);
lexer->token = T_ID;
ds_assign_cstr (&lexer->tokstr, id);
void
lex_discard_rest_of_command (struct lexer *lexer)
{
- if (!getl_is_interactive ())
+ if (!getl_is_interactive (lexer->ss))
{
while (lexer->token != T_STOP && lexer->token != '.')
lex_get (lexer);
}
}
-/* Reads a line, without performing any preprocessing */
+/* Prepares LINE, which is subject to the given SYNTAX rules, for
+ tokenization by stripping comments and determining whether it
+ is the beginning or end of a command and storing into
+ *LINE_STARTS_COMMAND and *LINE_ENDS_COMMAND appropriately. */
+void
+lex_preprocess_line (struct string *line,
+ enum getl_syntax syntax,
+ bool *line_starts_command,
+ bool *line_ends_command)
+{
+ strip_comments (line);
+ ds_rtrim (line, ss_cstr (CC_SPACES));
+ *line_ends_command = (ds_chomp (line, get_endcmd ())
+ || (ds_is_empty (line) && get_nulline ()));
+ *line_starts_command = false;
+ if (syntax == GETL_BATCH)
+ {
+ int first = ds_first (line);
+ *line_starts_command = !isspace (first);
+ if (first == '+' || first == '-')
+ *ds_data (line) = ' ';
+ }
+}
+
+/* Reads a line, without performing any preprocessing.
+ Sets *SYNTAX, if SYNTAX is non-null, to the line's syntax
+ mode. */
bool
-lex_get_line_raw (struct lexer *lexer)
+lex_get_line_raw (struct lexer *lexer, enum getl_syntax *syntax)
{
- bool dummy;
- return lexer->read_line (&lexer->line_buffer, &dummy);
+ enum getl_syntax dummy;
+ bool ok = getl_read_line (lexer->ss, &lexer->line_buffer,
+ syntax != NULL ? syntax : &dummy);
+ return ok;
}
/* Reads a line for use by the tokenizer, and preprocesses it by
bool
lex_get_line (struct lexer *lexer)
{
- struct string *line = &lexer->line_buffer;
- bool interactive;
-
- if (!lexer->read_line (line, &interactive))
- return false;
+ bool line_starts_command;
+ enum getl_syntax syntax;
- strip_comments (line);
- ds_rtrim (line, ss_cstr (CC_SPACES));
-
- /* Check for and remove terminal dot. */
- lexer->dot = (ds_chomp (line, get_endcmd ())
- || (ds_is_empty (line) && get_nulline ()));
-
- /* Strip leading indentors or insert a terminal dot (unless the
- line was obtained interactively). */
- if (!interactive)
+ if (!lex_get_line_raw (lexer, &syntax))
{
- int first = ds_first (line);
-
- if (first == '+' || first == '-')
- *ds_data (line) = ' ';
- else if (first != EOF && !isspace (first))
- lexer->put_token = '.';
+ lexer->prog = NULL;
+ return false;
}
- lexer->prog = ds_cstr (line);
+ lex_preprocess_line (&lexer->line_buffer, syntax,
+ &line_starts_command, &lexer->dot);
+
+ if (line_starts_command)
+ lexer->put_token = '.';
+ lexer->prog = ds_cstr (&lexer->line_buffer);
return true;
}
\f
/* Token names. */
-/* Returns the name of a token in a static buffer. */
+/* Returns the name of a token. */
const char *
lex_token_name (int token)
{
- if (token >= T_FIRST_KEYWORD && token <= T_LAST_KEYWORD)
- return keywords[token - T_FIRST_KEYWORD];
-
- if (token < 256)
+ if (lex_is_keyword (token))
+ return lex_id_name (token);
+ else if (token < 256)
{
- static char t[2];
- t[0] = token;
- return t;
+ static char t[256][2];
+ char *s = t[token];
+ s[0] = token;
+ s[1] = '\0';
+ return s;
}
-
- NOT_REACHED ();
+ else
+ NOT_REACHED ();
}
/* Returns an ASCII representation of the current token as a
return xstrdup ("**");
default:
- if (lexer->token >= T_FIRST_KEYWORD && lexer->token <= T_LAST_KEYWORD)
- return xstrdup (keywords [lexer->token - T_FIRST_KEYWORD]);
- else
- {
- token_rep = xmalloc (2);
- token_rep[0] = lexer->token;
- token_rep[1] = '\0';
- return token_rep;
- }
+ return xstrdup (lex_token_name (lexer->token));
}
NOT_REACHED ();
}
}
-/* We're not at eof any more. */
-void
-lex_reset_eof (struct lexer *lexer)
-{
- lexer->eof = false;
-}
-
/* Skip a COMMENT command. */
void
lex_skip_comment (struct lexer *lexer)
if (!lex_get_line (lexer))
{
lexer->put_token = T_STOP;
- lexer->eof = true;
+ lexer->prog = NULL;
return;
}
if (ds_length (&lexer->tokstr) % chars_per_byte)
msg (SE, _("String of %s digits has %d characters, which is not a "
"multiple of %d."),
- base_name, ds_length (&lexer->tokstr), chars_per_byte);
+ base_name, (int) ds_length (&lexer->tokstr), chars_per_byte);
p = ds_cstr (&lexer->tokstr);
for (i = 0; i < byte_cnt; i++)
lexer->prog++;
/* Skip whitespace after final quote mark. */
- if (lexer->eof)
+ if (lexer->prog == NULL)
break;
for (;;)
{
lexer->prog++;
/* Skip whitespace after plus sign. */
- if (lexer->eof)
+ if (lexer->prog == NULL)
break;
for (;;)
{
if (ds_length (&lexer->tokstr) > 255)
{
msg (SE, _("String exceeds 255 characters in length (%d characters)."),
- ds_length (&lexer->tokstr));
+ (int) ds_length (&lexer->tokstr));
ds_truncate (&lexer->tokstr, 255);
}
const char *curfn;
int curln;
- getl_location (&curfn, &curln);
+ curln = getl_source_location (lexer->ss);
+ curfn = getl_source_name (lexer->ss);
if (curfn)
fprintf (stderr, "%s:%d\t", curfn, curln);
}
break;
default:
- if (lexer->token >= T_FIRST_KEYWORD && lexer->token <= T_LAST_KEYWORD)
- fprintf (stderr, "KEYWORD\t%s\n", lex_token_name (token));
+ if (lex_is_keyword (lexer->token))
+ fprintf (stderr, "KEYWORD\t%s\n", lex_token_name (lexer->token));
else
fprintf (stderr, "PUNCT\t%c\n", lexer->token);
break;