/* PSPP - computes sample statistics.
- Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
- Written by Ben Pfaff <blp@gnu.org>.
+ Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
#define _(msgid) gettext (msgid)
#define N_(msgid) msgid
-/*
-#define DUMP_TOKENS 1
-*/
+
+#define DUMP_TOKENS 0
+
struct lexer
{
char *prog; /* Pointer to next token in line_buffer. */
bool dot; /* True only if this line ends with a terminal dot. */
- bool eof; /* True only if the last token returned was T_STOP. */
int put_token ; /* If nonzero, next token returned by lex_get().
Used only in exceptional circumstances. */
static int parse_string (struct lexer *, enum string_type);
#if DUMP_TOKENS
-static void dump_token (void);
+static void dump_token (struct lexer *);
#endif
\f
/* Initialization. */
ds_init_empty (&lexer->line_buffer);
lexer->ss = ss;
- if (!lex_get_line (lexer))
- lexer->eof = true;
-
return lexer;
}
void
lex_get (struct lexer *lexer)
{
+ /* Find a token. */
+ for (;;)
+ {
+ if (NULL == lexer->prog && ! lex_get_line (lexer) )
+ {
+ lexer->token = T_STOP;
+ return;
+ }
+
/* If a token was pushed ahead, return it. */
if (lexer->put_token)
{
restore_token (lexer);
#if DUMP_TOKENS
- dump_token ();
+ dump_token (lexer);
#endif
return;
}
- /* Find a token. */
for (;;)
{
/* Skip whitespace. */
- if (lexer->eof)
- {
- lexer->token = T_STOP;
- return;
- }
-
- for (;;)
- {
while (isspace ((unsigned char) *lexer->prog))
lexer->prog++;
+
if (*lexer->prog)
break;
lexer->dot = 0;
lexer->token = '.';
#if DUMP_TOKENS
- dump_token ();
+ dump_token (lexer);
#endif
return;
}
else if (!lex_get_line (lexer))
{
- lexer->eof = true;
+ lexer->prog = NULL;
lexer->token = T_STOP;
#if DUMP_TOKENS
- dump_token ();
+ dump_token (lexer);
#endif
return;
}
{
restore_token (lexer);
#if DUMP_TOKENS
- dump_token ();
+ dump_token (lexer);
#endif
return;
}
}
#if DUMP_TOKENS
- dump_token ();
+ dump_token (lexer);
#endif
}
return lexer->token == T_POS_NUM || lexer->token == T_NEG_NUM;
}
+
+/* Returns true if the current token is a string. */
+bool
+lex_is_string (struct lexer *lexer)
+{
+ return lexer->token == T_STRING;
+}
+
+
/* Returns the value of the current token, which must be a
floating point number. */
double
for (;;)
{
- if (lexer->eof)
+ if (NULL == lexer->prog && ! lex_get_line (lexer) )
return 0;
for (;;)
/* Returns the entire contents of the current line. */
const char *
-lex_entire_line (struct lexer *lexer)
+lex_entire_line (const struct lexer *lexer)
{
return ds_cstr (&lexer->line_buffer);
}
const struct string *
-lex_entire_line_ds (struct lexer *lexer)
+lex_entire_line_ds (const struct lexer *lexer)
{
return &lexer->line_buffer;
}
/* As lex_entire_line(), but only returns the part of the current line
- that hasn't already been tokenized.
- If END_DOT is non-null, stores nonzero into *END_DOT if the line
- ends with a terminal dot, or zero if it doesn't. */
+ that hasn't already been tokenized. */
const char *
-lex_rest_of_line (struct lexer *lexer, int *end_dot)
+lex_rest_of_line (const struct lexer *lexer)
{
- if (end_dot)
- *end_dot = lexer->dot;
return lexer->prog;
}
+/* Returns true if the current line ends in a terminal dot,
+ false otherwise. */
+bool
+lex_end_dot (const struct lexer *lexer)
+{
+ return lexer->dot;
+}
+
/* Causes the rest of the current input line to be ignored for
tokenization purposes. */
void
enum getl_syntax syntax;
if (!lex_get_line_raw (lexer, &syntax))
+ {
+ lexer->prog = NULL;
return false;
+ }
lex_preprocess_line (&lexer->line_buffer, syntax,
&line_starts_command, &lexer->dot);
+
if (line_starts_command)
lexer->put_token = '.';
if (!lex_get_line (lexer))
{
lexer->put_token = T_STOP;
- lexer->eof = true;
+ lexer->prog = NULL;
return;
}
if (ds_length (&lexer->tokstr) % chars_per_byte)
msg (SE, _("String of %s digits has %d characters, which is not a "
"multiple of %d."),
- base_name, ds_length (&lexer->tokstr), chars_per_byte);
+ base_name, (int) ds_length (&lexer->tokstr), chars_per_byte);
p = ds_cstr (&lexer->tokstr);
for (i = 0; i < byte_cnt; i++)
lexer->prog++;
/* Skip whitespace after final quote mark. */
- if (lexer->eof)
+ if (lexer->prog == NULL)
break;
for (;;)
{
lexer->prog++;
/* Skip whitespace after plus sign. */
- if (lexer->eof)
+ if (lexer->prog == NULL)
break;
for (;;)
{
if (ds_length (&lexer->tokstr) > 255)
{
msg (SE, _("String exceeds 255 characters in length (%d characters)."),
- ds_length (&lexer->tokstr));
+ (int) ds_length (&lexer->tokstr));
ds_truncate (&lexer->tokstr, 255);
}
const char *curfn;
int curln;
- getl_location (&curfn, &curln);
+ curln = getl_source_location (lexer->ss);
+ curfn = getl_source_name (lexer->ss);
if (curfn)
fprintf (stderr, "%s:%d\t", curfn, curln);
}
break;
default:
- if (lex_is_keyword (token))
- fprintf (stderr, "KEYWORD\t%s\n", lex_token_name (token));
+ if (lex_is_keyword (lexer->token))
+ fprintf (stderr, "KEYWORD\t%s\n", lex_token_name (lexer->token));
else
fprintf (stderr, "PUNCT\t%c\n", lexer->token);
break;