X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flexer.c;h=cc2f8ca8e54e35407569b3a26bcb62923846fbee;hb=e210b20bf6f405637c8c03dd280b5a4a627191b8;hp=edcc5a2ae381498d901ae1589c4daf6292715d2f;hpb=3a7fba81ceae5b049d0f7d671e9e3c3c43bbf703;p=pspp-builds.git diff --git a/src/lexer.c b/src/lexer.c index edcc5a2a..cc2f8ca8 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -14,12 +14,12 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ #include #include "lexer.h" -#include +#include "error.h" #include #include #include @@ -29,55 +29,56 @@ #include "alloc.h" #include "command.h" #include "error.h" -#include "getline.h" +#include "getl.h" #include "magic.h" #include "settings.h" #include "str.h" -/*#define DUMP_TOKENS 1*/ +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + +/* +#define DUMP_TOKENS 1 +*/ /* Global variables. */ +extern const char *keywords[T_N_KEYWORDS + 1]; + + /* Current token. */ int token; -/* T_NUM: the token's value. */ +/* T_POS_NUM, T_NEG_NUM: the token's value. */ double tokval; /* T_ID: the identifier. */ -char tokid[9]; +char tokid[LONG_NAME_LEN + 1]; /* T_ID, T_STRING: token string value. - For T_ID, this is not truncated to 8 characters as is tokid. */ + For T_ID, this is not truncated as is tokid. */ struct string tokstr; /* Static variables. */ -/* Table of keywords. */ -static const char *keywords[T_N_KEYWORDS + 1] = - { - "AND", "OR", "NOT", - "EQ", "GE", "GT", "LE", "LT", "NE", - "ALL", "BY", "TO", "WITH", - NULL, - }; - /* Pointer to next token in getl_buf. */ static char *prog; /* Nonzero only if this line ends with a terminal dot. */ static int dot; -/* Nonzero only if the last token returned was T_EOF. */ +/* Nonzero only if the last token returned was T_STOP. */ static int eof; /* If nonzero, next token returned by lex_get(). Used only in exceptional circumstances. */ -static int put; +static int put_token; +static struct string put_tokstr; +static double put_tokval; static void unexpected_eof (void); -static inline int check_id (const char *id, size_t len); static void convert_numeric_string_to_char_string (int type); static int parse_string (int type); @@ -91,22 +92,54 @@ static void dump_token (void); void lex_init (void) { + ds_init (&tokstr, 64); + ds_init (&put_tokstr, 64); if (!lex_get_line ()) unexpected_eof (); } + +void +lex_done (void) +{ + ds_destroy (&put_tokstr); + ds_destroy (&tokstr); +} + /* Common functions. */ +/* Copies put_token, put_tokstr, put_tokval into token, tokstr, + tokval, respectively, and sets tokid appropriately. */ +static void +restore_token (void) +{ + assert (put_token != 0); + token = put_token; + ds_replace (&tokstr, ds_c_str (&put_tokstr)); + str_copy_trunc (tokid, sizeof tokid, ds_c_str (&tokstr)); + tokval = put_tokval; + put_token = 0; +} + +/* Copies token, tokstr, tokval into put_token, put_tokstr, + put_tokval respectively. */ +static void +save_token (void) +{ + put_token = token; + ds_replace (&put_tokstr, ds_c_str (&tokstr)); + put_tokval = tokval; +} + /* Parses a single token, setting appropriate global variables to indicate the token's attributes. */ void lex_get (void) { /* If a token was pushed ahead, return it. */ - if (put) + if (put_token) { - token = put; - put = 0; + restore_token (); #if DUMP_TOKENS dump_token (); #endif @@ -148,10 +181,9 @@ lex_get (void) return; } - if (put) + if (put_token) { - token = put; - put = 0; + restore_token (); #if DUMP_TOKENS dump_token (); #endif @@ -159,6 +191,7 @@ lex_get (void) } } + /* Actually parse the token. */ cp = prog; ds_clear (&tokstr); @@ -180,7 +213,7 @@ lex_get (void) negative numbers into two tokens. */ if (*cp == '-') { - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); while (isspace ((unsigned char) *prog)) prog++; @@ -189,39 +222,41 @@ lex_get (void) token = '-'; break; } + token = T_NEG_NUM; } - + else + token = T_POS_NUM; + /* Parse the number, copying it into tokstr. */ while (isdigit ((unsigned char) *prog)) - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); if (*prog == '.') { - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); while (isdigit ((unsigned char) *prog)) - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); } if (*prog == 'e' || *prog == 'E') { - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); if (*prog == '+' || *prog == '-') - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); while (isdigit ((unsigned char) *prog)) - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); } /* Parse as floating point. */ - tokval = strtod (ds_value (&tokstr), &tail); + tokval = strtod (ds_c_str (&tokstr), &tail); if (*tail) { msg (SE, _("%s does not form a valid number."), - ds_value (&tokstr)); + ds_c_str (&tokstr)); tokval = 0.0; ds_clear (&tokstr); - ds_putchar (&tokstr, '0'); + ds_putc (&tokstr, '0'); } - token = T_NUM; break; } @@ -318,15 +353,15 @@ lex_get (void) } /* Copy id to tokstr. */ - ds_putchar (&tokstr, toupper ((unsigned char) *prog++)); + ds_putc (&tokstr, *prog++); while (CHAR_IS_IDN (*prog)) - ds_putchar (&tokstr, toupper ((unsigned char) *prog++)); + ds_putc (&tokstr, *prog++); - /* Copy tokstr to tokid, truncating it to 8 characters. */ - strncpy (tokid, ds_value (&tokstr), 8); - tokid[8] = 0; + /* Copy tokstr to tokid, possibly truncating it.*/ + str_copy_trunc (tokid, sizeof tokid, ds_c_str (&tokstr)); - token = check_id (ds_value (&tokstr), ds_length (&tokstr)); + /* Determine token type. */ + token = lex_id_to_token (ds_c_str (&tokstr), ds_length (&tokstr)); break; default: @@ -345,17 +380,40 @@ lex_get (void) #endif } +/* Reports an error to the effect that subcommand SBC may only be + specified once. */ +void +lex_sbc_only_once (const char *sbc) +{ + msg (SE, _("Subcommand %s may only be specified once."), sbc); +} + +/* Reports an error to the effect that subcommand SBC is + missing. */ +void +lex_sbc_missing (const char *sbc) +{ + lex_error (_("missing required subcommand %s"), sbc); +} + /* Prints a syntax error message containing the current token and given message MESSAGE (if non-null). */ void lex_error (const char *message, ...) { char *token_rep; + char where[128]; token_rep = lex_token_representation (); - if (token_rep[0] == 0) - msg (SE, _("Syntax error at end of file.")); - else if (message) + if (token == T_STOP) + strcpy (where, "end of file"); + else if (token == '.') + strcpy (where, "end of command"); + else + snprintf (where, sizeof where, "`%s'", token_rep); + free (token_rep); + + if (message) { char buf[1024]; va_list args; @@ -364,12 +422,10 @@ lex_error (const char *message, ...) vsnprintf (buf, 1024, message, args); va_end (args); - msg (SE, _("Syntax error %s at `%s'."), buf, token_rep); + msg (SE, _("Syntax error %s at %s."), buf, where); } else - msg (SE, _("Syntax error at `%s'."), token_rep); - - free (token_rep); + msg (SE, _("Syntax error at %s."), where); } /* Checks that we're at end of command. @@ -390,11 +446,27 @@ lex_end_of_command (void) /* Token testing functions. */ -/* Returns nonzero if the current token is an integer. */ -int -lex_integer_p (void) +/* Returns true if the current token is a number. */ +bool +lex_is_number (void) { - return (token == T_NUM + return token == T_POS_NUM || token == T_NEG_NUM; +} + +/* Returns the value of the current token, which must be a + floating point number. */ +double +lex_number (void) +{ + assert (lex_is_number ()); + return tokval; +} + +/* Returns true iff the current token is an integer. */ +bool +lex_is_integer (void) +{ + return (lex_is_number () && tokval != NOT_LONG && tokval >= LONG_MIN && tokval <= LONG_MAX @@ -406,7 +478,7 @@ lex_integer_p (void) long lex_integer (void) { - assert (lex_integer_p ()); + assert (lex_is_integer ()); return tokval; } @@ -427,7 +499,8 @@ lex_match (int t) } /* If the current token is the identifier S, skips it and returns - nonzero. + nonzero. The identifier may be abbreviated to its first three + letters. Otherwise, returns zero. */ int lex_match_id (const char *s) @@ -446,7 +519,7 @@ lex_match_id (const char *s) int lex_match_int (int x) { - if (lex_integer_p () && lex_integer () == x) + if (lex_is_integer () && lex_integer () == x) { lex_get (); return 1; @@ -487,7 +560,7 @@ lex_force_match (int t) } else { - lex_error (_("expecting %s"), lex_token_name (t)); + lex_error (_("expecting `%s'"), lex_token_name (t)); return 0; } } @@ -511,7 +584,7 @@ lex_force_string (void) int lex_force_int (void) { - if (lex_integer_p ()) + if (lex_is_integer ()) return 1; else { @@ -525,7 +598,7 @@ lex_force_int (void) int lex_force_num (void) { - if (token == T_NUM) + if (lex_is_number ()) return 1; else { @@ -547,43 +620,6 @@ lex_force_id (void) return 0; } } - -/* Comparing identifiers. */ - -/* Keywords match if one of the following is true: KW and TOK are - identical (barring differences in case), or TOK is at least 3 - characters long and those characters are identical to KW. KW_LEN - is the length of KW, TOK_LEN is the length of TOK. */ -int -lex_id_match_len (const char *kw, size_t kw_len, - const char *tok, size_t tok_len) -{ - size_t i = 0; - - assert (kw && tok); - for (;;) - { - if (i == kw_len && i == tok_len) - return 1; - else if (i == tok_len) - return i >= 3; - else if (i == kw_len) - return 0; - else if (toupper ((unsigned char) kw[i]) - != toupper ((unsigned char) tok[i])) - return 0; - - i++; - } -} - -/* Same as lex_id_match_len() minus the need to pass in the lengths. */ -int -lex_id_match (const char *kw, const char *tok) -{ - return lex_id_match_len (kw, strlen (kw), tok, strlen (tok)); -} - /* Weird token functions. */ /* Returns the first character of the next token, except that if the @@ -595,8 +631,8 @@ lex_id_match (const char *kw, const char *tok) int lex_look_ahead (void) { - if (put) - return put; + if (put_token) + return put_token; for (;;) { @@ -615,8 +651,8 @@ lex_look_ahead (void) else if (!lex_get_line ()) unexpected_eof (); - if (put) - return put; + if (put_token) + return put_token; } if ((toupper ((unsigned char) *prog) == 'X' @@ -633,45 +669,41 @@ lex_look_ahead (void) void lex_put_back (int t) { - put = token; + save_token (); token = t; } -/* Makes T the next token read. */ +/* Makes the current token become the next token to be read; the + current token is set to the identifier ID. */ void -lex_put_forward (int t) +lex_put_back_id (const char *id) { - put = t; + assert (lex_id_to_token (id, strlen (id)) == T_ID); + save_token (); + token = T_ID; + ds_replace (&tokstr, id); + str_copy_trunc (tokid, sizeof tokid, ds_c_str (&tokstr)); } /* Weird line processing functions. */ -/* Discards the rest of the current input line for tokenization - purposes, but returns the entire contents of the line for use by - the caller. */ -char * +/* Returns the entire contents of the current line. */ +const char * lex_entire_line (void) { - prog = ds_end (&getl_buf); - dot = 0; - return ds_value (&getl_buf); + return ds_c_str (&getl_buf); } /* As lex_entire_line(), but only returns the part of the current line that hasn't already been tokenized. - If HAD_DOT is non-null, stores nonzero into *HAD_DOT if the line + If END_DOT is non-null, stores nonzero into *END_DOT if the line ends with a terminal dot, or zero if it doesn't. */ -char * -lex_rest_of_line (int *had_dot) +const char * +lex_rest_of_line (int *end_dot) { - char *s = prog; - prog = ds_end (&getl_buf); - - if (had_dot) - *had_dot = dot; - dot = 0; - - return s; + if (end_dot) + *end_dot = dot; + return prog; } /* Causes the rest of the current input line to be ignored for @@ -679,11 +711,8 @@ lex_rest_of_line (int *had_dot) void lex_discard_line (void) { - msg (SW, _("The rest of this command has been discarded.")); - - ds_clear (&getl_buf); - prog = ds_value (&getl_buf); - dot = put = 0; + prog = ds_end (&getl_buf); + dot = put_token = 0; } /* Sets the current position in the current line to P, which must be @@ -724,9 +753,9 @@ lex_preprocess_line (void) int quote; /* Remove C-style comments begun by slash-star and terminated by - star-slash or newline. */ + star-slash or newline. */ quote = comment = 0; - for (cp = ds_value (&getl_buf); *cp; ) + for (cp = ds_c_str (&getl_buf); *cp; ) { /* If we're not commented out, toggle quoting. */ if (!comment) @@ -767,19 +796,19 @@ lex_preprocess_line (void) /* Strip trailing whitespace and terminal dot. */ { size_t len = ds_length (&getl_buf); - char *s = ds_value (&getl_buf); + char *s = ds_c_str (&getl_buf); /* Strip trailing whitespace. */ while (len > 0 && isspace ((unsigned char) s[len - 1])) len--; /* Check for and remove terminal dot. */ - if (len > 0 && s[len - 1] == set_endcmd) + if (len > 0 && s[len - 1] == get_endcmd ()) { dot = 1; len--; } - else if (len == 0 && set_nullline) + else if (len == 0 && get_nulline ()) dot = 1; else dot = 0; @@ -792,15 +821,15 @@ lex_preprocess_line (void) as necessary. */ if (getl_interactive != 2 && getl_mode == GETL_MODE_BATCH) { - char *s = ds_value (&getl_buf); + char *s = ds_c_str (&getl_buf); if (s[0] == '+' || s[0] == '-' || s[0] == '.') s[0] = ' '; else if (s[0] && !isspace ((unsigned char) s[0])) - lex_put_forward ('.'); + put_token = '.'; } - prog = ds_value (&getl_buf); + prog = ds_c_str (&getl_buf); } /* Token names. */ @@ -832,8 +861,9 @@ lex_token_representation (void) switch (token) { case T_ID: - case T_NUM: - return xstrdup (ds_value (&tokstr)); + case T_POS_NUM: + case T_NEG_NUM: + return xstrdup (ds_c_str (&tokstr)); break; case T_STRING: @@ -841,7 +871,7 @@ lex_token_representation (void) int hexstring = 0; char *sp, *dp; - for (sp = ds_value (&tokstr); sp < ds_end (&tokstr); sp++) + for (sp = ds_c_str (&tokstr); sp < ds_end (&tokstr); sp++) if (!isprint ((unsigned char) *sp)) { hexstring = 1; @@ -856,14 +886,14 @@ lex_token_representation (void) *dp++ = '\''; if (!hexstring) - for (sp = ds_value (&tokstr); *sp; ) + for (sp = ds_c_str (&tokstr); *sp; ) { if (*sp == '\'') *dp++ = '\''; *dp++ = (unsigned char) *sp++; } else - for (sp = ds_value (&tokstr); sp < ds_end (&tokstr); sp++) + for (sp = ds_c_str (&tokstr); sp < ds_end (&tokstr); sp++) { *dp++ = (((unsigned char) *sp) >> 4)["0123456789ABCDEF"]; *dp++ = (((unsigned char) *sp) & 15)["0123456789ABCDEF"]; @@ -907,12 +937,13 @@ lex_token_representation (void) void lex_negative_to_dash (void) { - if (token == T_NUM && tokval < 0.0) + if (token == T_NEG_NUM) { - token = '-'; + token = T_POS_NUM; tokval = -tokval; - ds_replace (&tokstr, ds_value (&tokstr) + 1); - lex_put_forward (T_NUM); + ds_replace (&tokstr, ds_c_str (&tokstr) + 1); + save_token (); + token = '-'; } } @@ -929,8 +960,14 @@ lex_skip_comment (void) { for (;;) { - lex_get_line (); - if (put == '.') + if (!lex_get_line ()) + { + put_token = T_STOP; + eof = 1; + return; + } + + if (put_token == '.') break; prog = ds_end (&getl_buf); @@ -948,23 +985,6 @@ unexpected_eof (void) msg (FE, _("Unexpected end of file.")); } -/* Returns the proper token type, either T_ID or a reserved keyword - enum, for ID[], which must contain LEN characters. */ -static inline int -check_id (const char *id, size_t len) -{ - const char **kwp; - - if (len < 2 || len > 4) - return T_ID; - - for (kwp = keywords; *kwp; kwp++) - if (!strcmp (*kwp, id)) - return T_FIRST_KEYWORD + (kwp - keywords); - - return T_ID; -} - /* When invoked, tokstr contains a string of binary, octal, or hex digits, for values of TYPE of 0, 1, or 2, respectively. The string is converted to characters having the specified values. */ @@ -989,7 +1009,7 @@ convert_numeric_string_to_char_string (int type) "multiple of %d."), gettext (base_name), ds_length (&tokstr), cpb); - p = ds_value (&tokstr); + p = ds_c_str (&tokstr); for (i = 0; i < nb; i++) { int value; @@ -1019,7 +1039,7 @@ convert_numeric_string_to_char_string (int type) value = value * base + v; } - ds_value (&tokstr)[i] = (unsigned char) value; + ds_c_str (&tokstr)[i] = (unsigned char) value; } ds_truncate (&tokstr, nb); @@ -1058,7 +1078,7 @@ parse_string (int type) break; } - ds_putchar (&tokstr, *prog++); + ds_putc (&tokstr, *prog++); } prog++; @@ -1128,7 +1148,7 @@ finish: int warned = 0; for (i = 0; i < ds_length (&tokstr); i++) - if (ds_value (&tokstr)[i] == 0) + if (ds_c_str (&tokstr)[i] == 0) { if (!warned) { @@ -1136,7 +1156,7 @@ finish: "characters. Replacing with spaces.")); warned = 1; } - ds_value (&tokstr)[i] = ' '; + ds_c_str (&tokstr)[i] = ' '; } } @@ -1155,41 +1175,42 @@ dump_token (void) getl_location (&curfn, &curln); if (curfn) - printf ("%s:%d\t", curfn, curln); + fprintf (stderr, "%s:%d\t", curfn, curln); } switch (token) { case T_ID: - printf ("ID\t%s\n", tokid); + fprintf (stderr, "ID\t%s\n", tokid); break; - case T_NUM: - printf ("NUM\t%f\n", tokval); + case T_POS_NUM: + case T_NEG_NUM: + fprintf (stderr, "NUM\t%f\n", tokval); break; case T_STRING: - printf ("STRING\t\"%s\"\n", ds_value (&tokstr)); + fprintf (stderr, "STRING\t\"%s\"\n", ds_c_str (&tokstr)); break; case T_STOP: - printf ("STOP\n"); + fprintf (stderr, "STOP\n"); break; case T_EXP: - puts ("MISC\tEXP"); + fprintf (stderr, "MISC\tEXP\""); break; case 0: - puts ("MISC\tEOF"); + fprintf (stderr, "MISC\tEOF\n"); break; default: if (token >= T_FIRST_KEYWORD && token <= T_LAST_KEYWORD) - printf ("KEYWORD\t%s\n", lex_token_name (token)); + fprintf (stderr, "KEYWORD\t%s\n", lex_token_name (token)); else - printf ("PUNCT\t%c\n", token); + fprintf (stderr, "PUNCT\t%c\n", token); break; } } -#endif /* DEBUGGING */ +#endif /* DUMP_TOKENS */