X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Flexer%2Flexer.c;h=68dd1c8638f190aea05bedd096912f3b07e34c11;hb=9105b67fe006fe41c044e3659325594a52d0c899;hp=9c6063fd066b04f018e34e4a7f545058f56df3ef;hpb=2d4dd90964061defa92972156ae2a12323708519;p=pspp diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index 9c6063fd06..68dd1c8638 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -1,6 +1,5 @@ /* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . + Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -41,15 +40,15 @@ #define _(msgid) gettext (msgid) #define N_(msgid) msgid -/* -#define DUMP_TOKENS 1 -*/ + +#define DUMP_TOKENS 0 + struct lexer { struct string line_buffer; - bool (*read_line) (struct string *, bool *); + struct source_stream *ss; int token; /* Current token. */ double tokval; /* T_POS_NUM, T_NEG_NUM: the token's value. */ @@ -62,7 +61,6 @@ struct lexer char *prog; /* Pointer to next token in line_buffer. */ bool dot; /* True only if this line ends with a terminal dot. */ - bool eof; /* True only if the last token returned was T_STOP. */ int put_token ; /* If nonzero, next token returned by lex_get(). Used only in exceptional circumstances. */ @@ -86,28 +84,32 @@ enum string_type static int parse_string (struct lexer *, enum string_type); #if DUMP_TOKENS -static void dump_token (void); +static void dump_token (struct lexer *); #endif /* Initialization. */ /* Initializes the lexer. */ struct lexer * -lex_create (bool (*read_line_func) (struct string *, bool *)) +lex_create (struct source_stream *ss) { struct lexer *lexer = xzalloc (sizeof (*lexer)); ds_init_empty (&lexer->tokstr); ds_init_empty (&lexer->put_tokstr); ds_init_empty (&lexer->line_buffer); - lexer->read_line = read_line_func; - - if (!lex_get_line (lexer)) - lexer->eof = true; + lexer->ss = ss; return lexer; } +struct source_stream * +lex_get_source_stream (const struct lexer *lex) +{ + return lex->ss; +} + + void lex_destroy (struct lexer *lexer) { @@ -152,30 +154,31 @@ save_token (struct lexer *lexer) void lex_get (struct lexer *lexer) { + /* Find a token. */ + for (;;) + { + if (NULL == lexer->prog && ! lex_get_line (lexer) ) + { + lexer->token = T_STOP; + return; + } + /* If a token was pushed ahead, return it. */ if (lexer->put_token) { restore_token (lexer); #if DUMP_TOKENS - dump_token (); + dump_token (lexer); #endif return; } - /* Find a token. */ for (;;) { /* Skip whitespace. */ - if (lexer->eof) - { - lexer->token = T_STOP; - return; - } - - for (;;) - { while (isspace ((unsigned char) *lexer->prog)) lexer->prog++; + if (*lexer->prog) break; @@ -184,16 +187,16 @@ lex_get (struct lexer *lexer) lexer->dot = 0; lexer->token = '.'; #if DUMP_TOKENS - dump_token (); + dump_token (lexer); #endif return; } else if (!lex_get_line (lexer)) { - lexer->eof = true; + lexer->prog = NULL; lexer->token = T_STOP; #if DUMP_TOKENS - dump_token (); + dump_token (lexer); #endif return; } @@ -202,7 +205,7 @@ lex_get (struct lexer *lexer) { restore_token (lexer); #if DUMP_TOKENS - dump_token (); + dump_token (lexer); #endif return; } @@ -379,7 +382,7 @@ lex_get (struct lexer *lexer) } #if DUMP_TOKENS - dump_token (); + dump_token (lexer); #endif } @@ -389,12 +392,17 @@ lex_get (struct lexer *lexer) static int parse_id (struct lexer *lexer) { - const char *start = lexer->prog; - lexer->prog = lex_skip_identifier (start); - - ds_put_substring (&lexer->tokstr, ss_buffer (start, lexer->prog - start)); + struct substring rest_of_line + = ss_substr (ds_ss (&lexer->line_buffer), + ds_pointer_to_position (&lexer->line_buffer, lexer->prog), + SIZE_MAX); + struct substring id = ss_head (rest_of_line, + lex_id_get_length (rest_of_line)); + lexer->prog += ss_length (id); + + ds_assign_substring (&lexer->tokstr, id); str_copy_trunc (lexer->tokid, sizeof lexer->tokid, ds_cstr (&lexer->tokstr)); - return lex_id_to_token (ds_cstr (&lexer->tokstr), ds_length (&lexer->tokstr)); + return lex_id_to_token (id); } /* Reports an error to the effect that subcommand SBC may only be @@ -522,7 +530,8 @@ lex_match (struct lexer *lexer, int t) bool lex_match_id (struct lexer *lexer, const char *s) { - if (lexer->token == T_ID && lex_id_match (s, lexer->tokid)) + if (lexer->token == T_ID + && lex_id_match (ss_cstr (s), ss_cstr (lexer->tokid))) { lex_get (lexer); return true; @@ -553,11 +562,8 @@ lex_match_int (struct lexer *lexer, int x) bool lex_force_match_id (struct lexer *lexer, const char *s) { - if (lexer->token == T_ID && lex_id_match (s, lexer->tokid)) - { - lex_get (lexer); - return true; - } + if (lex_match_id (lexer, s)) + return true; else { lex_error (lexer, _("expecting `%s'"), s); @@ -650,7 +656,7 @@ lex_look_ahead (struct lexer *lexer) for (;;) { - if (lexer->eof) + if (NULL == lexer->prog && ! lex_get_line (lexer) ) return 0; for (;;) @@ -693,7 +699,7 @@ lex_put_back (struct lexer *lexer, int t) void lex_put_back_id (struct lexer *lexer, const char *id) { - assert (lex_id_to_token (id, strlen (id)) == T_ID); + assert (lex_id_to_token (ss_cstr (id)) == T_ID); save_token (lexer); lexer->token = T_ID; ds_assign_cstr (&lexer->tokstr, id); @@ -749,7 +755,7 @@ lex_discard_line (struct lexer *lexer) void lex_discard_rest_of_command (struct lexer *lexer) { - if (!getl_is_interactive ()) + if (!getl_is_interactive (lexer->ss)) { while (lexer->token != T_STOP && lexer->token != '.') lex_get (lexer); @@ -808,12 +814,40 @@ strip_comments (struct string *string) } } -/* Reads a line, without performing any preprocessing */ +/* Prepares LINE, which is subject to the given SYNTAX rules, for + tokenization by stripping comments and determining whether it + is the beginning or end of a command and storing into + *LINE_STARTS_COMMAND and *LINE_ENDS_COMMAND appropriately. */ +void +lex_preprocess_line (struct string *line, + enum getl_syntax syntax, + bool *line_starts_command, + bool *line_ends_command) +{ + strip_comments (line); + ds_rtrim (line, ss_cstr (CC_SPACES)); + *line_ends_command = (ds_chomp (line, get_endcmd ()) + || (ds_is_empty (line) && get_nulline ())); + *line_starts_command = false; + if (syntax == GETL_BATCH) + { + int first = ds_first (line); + *line_starts_command = !isspace (first); + if (first == '+' || first == '-') + *ds_data (line) = ' '; + } +} + +/* Reads a line, without performing any preprocessing. + Sets *SYNTAX, if SYNTAX is non-null, to the line's syntax + mode. */ bool -lex_get_line_raw (struct lexer *lexer) +lex_get_line_raw (struct lexer *lexer, enum getl_syntax *syntax) { - bool dummy; - return lexer->read_line (&lexer->line_buffer, &dummy); + enum getl_syntax dummy; + bool ok = getl_read_line (lexer->ss, &lexer->line_buffer, + syntax != NULL ? syntax : &dummy); + return ok; } /* Reads a line for use by the tokenizer, and preprocesses it by @@ -822,53 +856,43 @@ lex_get_line_raw (struct lexer *lexer) bool lex_get_line (struct lexer *lexer) { - struct string *line = &lexer->line_buffer; - bool interactive; + bool line_starts_command; + enum getl_syntax syntax; - if (!lexer->read_line (line, &interactive)) - return false; - - strip_comments (line); - ds_rtrim (line, ss_cstr (CC_SPACES)); - - /* Check for and remove terminal dot. */ - lexer->dot = (ds_chomp (line, get_endcmd ()) - || (ds_is_empty (line) && get_nulline ())); - - /* Strip leading indentors or insert a terminal dot (unless the - line was obtained interactively). */ - if (!interactive) + if (!lex_get_line_raw (lexer, &syntax)) { - int first = ds_first (line); - - if (first == '+' || first == '-') - *ds_data (line) = ' '; - else if (first != EOF && !isspace (first)) - lexer->put_token = '.'; + lexer->prog = NULL; + return false; } - lexer->prog = ds_cstr (line); + lex_preprocess_line (&lexer->line_buffer, syntax, + &line_starts_command, &lexer->dot); + if (line_starts_command) + lexer->put_token = '.'; + + lexer->prog = ds_cstr (&lexer->line_buffer); return true; } /* Token names. */ -/* Returns the name of a token in a static buffer. */ +/* Returns the name of a token. */ const char * lex_token_name (int token) { - if (token >= T_FIRST_KEYWORD && token <= T_LAST_KEYWORD) - return keywords[token - T_FIRST_KEYWORD]; - - if (token < 256) + if (lex_is_keyword (token)) + return lex_id_name (token); + else if (token < 256) { - static char t[2]; - t[0] = token; - return t; + static char t[256][2]; + char *s = t[token]; + s[0] = token; + s[1] = '\0'; + return s; } - - NOT_REACHED (); + else + NOT_REACHED (); } /* Returns an ASCII representation of the current token as a @@ -934,15 +958,7 @@ lex_token_representation (struct lexer *lexer) return xstrdup ("**"); default: - if (lexer->token >= T_FIRST_KEYWORD && lexer->token <= T_LAST_KEYWORD) - return xstrdup (keywords [lexer->token - T_FIRST_KEYWORD]); - else - { - token_rep = xmalloc (2); - token_rep[0] = lexer->token; - token_rep[1] = '\0'; - return token_rep; - } + return xstrdup (lex_token_name (lexer->token)); } NOT_REACHED (); @@ -967,13 +983,6 @@ lex_negative_to_dash (struct lexer *lexer) } } -/* We're not at eof any more. */ -void -lex_reset_eof (struct lexer *lexer) -{ - lexer->eof = false; -} - /* Skip a COMMENT command. */ void lex_skip_comment (struct lexer *lexer) @@ -983,7 +992,7 @@ lex_skip_comment (struct lexer *lexer) if (!lex_get_line (lexer)) { lexer->put_token = T_STOP; - lexer->eof = true; + lexer->prog = NULL; return; } @@ -1038,7 +1047,7 @@ convert_numeric_string_to_char_string (struct lexer *lexer, if (ds_length (&lexer->tokstr) % chars_per_byte) msg (SE, _("String of %s digits has %d characters, which is not a " "multiple of %d."), - base_name, ds_length (&lexer->tokstr), chars_per_byte); + base_name, (int) ds_length (&lexer->tokstr), chars_per_byte); p = ds_cstr (&lexer->tokstr); for (i = 0; i < byte_cnt; i++) @@ -1117,7 +1126,7 @@ parse_string (struct lexer *lexer, enum string_type type) lexer->prog++; /* Skip whitespace after final quote mark. */ - if (lexer->eof) + if (lexer->prog == NULL) break; for (;;) { @@ -1139,7 +1148,7 @@ parse_string (struct lexer *lexer, enum string_type type) lexer->prog++; /* Skip whitespace after plus sign. */ - if (lexer->eof) + if (lexer->prog == NULL) break; for (;;) { @@ -1175,7 +1184,7 @@ finish: if (ds_length (&lexer->tokstr) > 255) { msg (SE, _("String exceeds 255 characters in length (%d characters)."), - ds_length (&lexer->tokstr)); + (int) ds_length (&lexer->tokstr)); ds_truncate (&lexer->tokstr, 255); } @@ -1192,7 +1201,8 @@ dump_token (struct lexer *lexer) const char *curfn; int curln; - getl_location (&curfn, &curln); + curln = getl_source_location (lexer->ss); + curfn = getl_source_name (lexer->ss); if (curfn) fprintf (stderr, "%s:%d\t", curfn, curln); } @@ -1225,8 +1235,8 @@ dump_token (struct lexer *lexer) break; default: - if (lexer->token >= T_FIRST_KEYWORD && lexer->token <= T_LAST_KEYWORD) - fprintf (stderr, "KEYWORD\t%s\n", lex_token_name (token)); + if (lex_is_keyword (lexer->token)) + fprintf (stderr, "KEYWORD\t%s\n", lex_token_name (lexer->token)); else fprintf (stderr, "PUNCT\t%c\n", lexer->token); break;