X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flexer.c;h=cc2f8ca8e54e35407569b3a26bcb62923846fbee;hb=e210b20bf6f405637c8c03dd280b5a4a627191b8;hp=edcc5a2ae381498d901ae1589c4daf6292715d2f;hpb=3a7fba81ceae5b049d0f7d671e9e3c3c43bbf703;p=pspp-builds.git

diff --git a/src/lexer.c b/src/lexer.c
index edcc5a2a..cc2f8ca8 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -14,12 +14,12 @@
 
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-   02111-1307, USA. */
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA. */
 
 #include <config.h>
 #include "lexer.h"
-#include <assert.h>
+#include "error.h"
 #include <ctype.h>
 #include <errno.h>
 #include <limits.h>
@@ -29,55 +29,56 @@
 #include "alloc.h"
 #include "command.h"
 #include "error.h"
-#include "getline.h"
+#include "getl.h"
 #include "magic.h"
 #include "settings.h"
 #include "str.h"
 
-/*#define DUMP_TOKENS 1*/
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+#define N_(msgid) msgid
+
+/*
+#define DUMP_TOKENS 1
+*/
 
 
 /* Global variables. */
 
+extern const char *keywords[T_N_KEYWORDS + 1];
+
+
 /* Current token. */
 int token;
 
-/* T_NUM: the token's value. */
+/* T_POS_NUM, T_NEG_NUM: the token's value. */
 double tokval;
 
 /* T_ID: the identifier. */
-char tokid[9];
+char tokid[LONG_NAME_LEN + 1];
 
 /* T_ID, T_STRING: token string value.
-   For T_ID, this is not truncated to 8 characters as is tokid. */
+   For T_ID, this is not truncated as is tokid. */
 struct string tokstr;
 
 /* Static variables. */
 
-/* Table of keywords. */
-static const char *keywords[T_N_KEYWORDS + 1] = 
-  {
-    "AND", "OR", "NOT",
-    "EQ", "GE", "GT", "LE", "LT", "NE",
-    "ALL", "BY", "TO", "WITH",
-    NULL,
-  };
-
 /* Pointer to next token in getl_buf. */
 static char *prog;
 
 /* Nonzero only if this line ends with a terminal dot. */
 static int dot;
 
-/* Nonzero only if the last token returned was T_EOF. */
+/* Nonzero only if the last token returned was T_STOP. */
 static int eof;
 
 /* If nonzero, next token returned by lex_get().
    Used only in exceptional circumstances. */
-static int put;			
+static int put_token;
+static struct string put_tokstr;
+static double put_tokval;
 
 static void unexpected_eof (void);
-static inline int check_id (const char *id, size_t len);
 static void convert_numeric_string_to_char_string (int type);
 static int parse_string (int type);
 
@@ -91,22 +92,54 @@ static void dump_token (void);
 void
 lex_init (void)
 {
+  ds_init (&tokstr, 64);
+  ds_init (&put_tokstr, 64);
   if (!lex_get_line ())
     unexpected_eof ();
 }
+
+void
+lex_done (void)
+{
+  ds_destroy (&put_tokstr);
+  ds_destroy (&tokstr);
+}
+
 
 /* Common functions. */
 
+/* Copies put_token, put_tokstr, put_tokval into token, tokstr,
+   tokval, respectively, and sets tokid appropriately. */
+static void
+restore_token (void) 
+{
+  assert (put_token != 0);
+  token = put_token;
+  ds_replace (&tokstr, ds_c_str (&put_tokstr));
+  str_copy_trunc (tokid, sizeof tokid, ds_c_str (&tokstr));
+  tokval = put_tokval;
+  put_token = 0;
+}
+
+/* Copies token, tokstr, tokval into put_token, put_tokstr,
+   put_tokval respectively. */
+static void
+save_token (void) 
+{
+  put_token = token;
+  ds_replace (&put_tokstr, ds_c_str (&tokstr));
+  put_tokval = tokval;
+}
+
 /* Parses a single token, setting appropriate global variables to
    indicate the token's attributes. */
 void
 lex_get (void)
 {
   /* If a token was pushed ahead, return it. */
-  if (put)
+  if (put_token)
     {
-      token = put;
-      put = 0;
+      restore_token ();
 #if DUMP_TOKENS
       dump_token ();
 #endif
@@ -148,10 +181,9 @@ lex_get (void)
 	      return;
 	    }
 
-	  if (put)
+	  if (put_token)
 	    {
-	      token = put;
-	      put = 0;
+              restore_token ();
 #if DUMP_TOKENS
 	      dump_token ();
 #endif
@@ -159,6 +191,7 @@ lex_get (void)
 	    }
 	}
 
+
       /* Actually parse the token. */
       cp = prog;
       ds_clear (&tokstr);
@@ -180,7 +213,7 @@ lex_get (void)
 	       negative numbers into two tokens. */
 	    if (*cp == '-')
 	      {
-		ds_putchar (&tokstr, *prog++);
+		ds_putc (&tokstr, *prog++);
 		while (isspace ((unsigned char) *prog))
 		  prog++;
 
@@ -189,39 +222,41 @@ lex_get (void)
 		    token = '-';
 		    break;
 		  }
+                token = T_NEG_NUM;
 	      }
-
+            else 
+              token = T_POS_NUM;
+                
 	    /* Parse the number, copying it into tokstr. */
 	    while (isdigit ((unsigned char) *prog))
-	      ds_putchar (&tokstr, *prog++);
+	      ds_putc (&tokstr, *prog++);
 	    if (*prog == '.')
 	      {
-		ds_putchar (&tokstr, *prog++);
+		ds_putc (&tokstr, *prog++);
 		while (isdigit ((unsigned char) *prog))
-		  ds_putchar (&tokstr, *prog++);
+		  ds_putc (&tokstr, *prog++);
 	      }
 	    if (*prog == 'e' || *prog == 'E')
 	      {
-		ds_putchar (&tokstr, *prog++);
+		ds_putc (&tokstr, *prog++);
 		if (*prog == '+' || *prog == '-')
-		  ds_putchar (&tokstr, *prog++);
+		  ds_putc (&tokstr, *prog++);
 		while (isdigit ((unsigned char) *prog))
-		  ds_putchar (&tokstr, *prog++);
+		  ds_putc (&tokstr, *prog++);
 	      }
 
 	    /* Parse as floating point. */
-	    tokval = strtod (ds_value (&tokstr), &tail);
+	    tokval = strtod (ds_c_str (&tokstr), &tail);
 	    if (*tail)
 	      {
 		msg (SE, _("%s does not form a valid number."),
-		     ds_value (&tokstr));
+		     ds_c_str (&tokstr));
 		tokval = 0.0;
 
 		ds_clear (&tokstr);
-		ds_putchar (&tokstr, '0');
+		ds_putc (&tokstr, '0');
 	      }
 
-	    token = T_NUM;
 	    break;
 	  }
 
@@ -318,15 +353,15 @@ lex_get (void)
 	    }
 
 	  /* Copy id to tokstr. */
-	  ds_putchar (&tokstr, toupper ((unsigned char) *prog++));
+	  ds_putc (&tokstr, *prog++);
 	  while (CHAR_IS_IDN (*prog))
-	    ds_putchar (&tokstr, toupper ((unsigned char) *prog++));
+	    ds_putc (&tokstr, *prog++);
 
-	  /* Copy tokstr to tokid, truncating it to 8 characters. */
-	  strncpy (tokid, ds_value (&tokstr), 8);
-	  tokid[8] = 0;
+	  /* Copy tokstr to tokid, possibly truncating it.*/
+	  str_copy_trunc (tokid, sizeof tokid, ds_c_str (&tokstr));
 
-	  token = check_id (ds_value (&tokstr), ds_length (&tokstr));
+          /* Determine token type. */
+	  token = lex_id_to_token (ds_c_str (&tokstr), ds_length (&tokstr));
 	  break;
 
 	default:
@@ -345,17 +380,40 @@ lex_get (void)
 #endif
 }
 
+/* Reports an error to the effect that subcommand SBC may only be
+   specified once. */
+void
+lex_sbc_only_once (const char *sbc) 
+{
+  msg (SE, _("Subcommand %s may only be specified once."), sbc);
+}
+
+/* Reports an error to the effect that subcommand SBC is
+   missing. */
+void
+lex_sbc_missing (const char *sbc) 
+{
+  lex_error (_("missing required subcommand %s"), sbc);
+}
+
 /* Prints a syntax error message containing the current token and
    given message MESSAGE (if non-null). */
 void
 lex_error (const char *message, ...)
 {
   char *token_rep;
+  char where[128];
 
   token_rep = lex_token_representation ();
-  if (token_rep[0] == 0)
-    msg (SE, _("Syntax error at end of file."));
-  else if (message)
+  if (token == T_STOP)
+    strcpy (where, "end of file");
+  else if (token == '.')
+    strcpy (where, "end of command");
+  else
+    snprintf (where, sizeof where, "`%s'", token_rep);
+  free (token_rep);
+
+  if (message)
     {
       char buf[1024];
       va_list args;
@@ -364,12 +422,10 @@ lex_error (const char *message, ...)
       vsnprintf (buf, 1024, message, args);
       va_end (args);
 
-      msg (SE, _("Syntax error %s at `%s'."), buf, token_rep);
+      msg (SE, _("Syntax error %s at %s."), buf, where);
     }
   else
-    msg (SE, _("Syntax error at `%s'."), token_rep);
-  
-  free (token_rep);
+    msg (SE, _("Syntax error at %s."), where);
 }
 
 /* Checks that we're at end of command.
@@ -390,11 +446,27 @@ lex_end_of_command (void)
 
 /* Token testing functions. */
 
-/* Returns nonzero if the current token is an integer. */
-int
-lex_integer_p (void)
+/* Returns true if the current token is a number. */
+bool
+lex_is_number (void) 
 {
-  return (token == T_NUM
+  return token == T_POS_NUM || token == T_NEG_NUM;
+}
+
+/* Returns the value of the current token, which must be a
+   floating point number. */
+double
+lex_number (void)
+{
+  assert (lex_is_number ());
+  return tokval;
+}
+
+/* Returns true iff the current token is an integer. */
+bool
+lex_is_integer (void)
+{
+  return (lex_is_number ()
 	  && tokval != NOT_LONG
 	  && tokval >= LONG_MIN
 	  && tokval <= LONG_MAX
@@ -406,7 +478,7 @@ lex_integer_p (void)
 long
 lex_integer (void)
 {
-  assert (lex_integer_p ());
+  assert (lex_is_integer ());
   return tokval;
 }
   
@@ -427,7 +499,8 @@ lex_match (int t)
 }
 
 /* If the current token is the identifier S, skips it and returns
-   nonzero.
+   nonzero.  The identifier may be abbreviated to its first three
+   letters.
    Otherwise, returns zero. */
 int
 lex_match_id (const char *s)
@@ -446,7 +519,7 @@ lex_match_id (const char *s)
 int
 lex_match_int (int x)
 {
-  if (lex_integer_p () && lex_integer () == x)
+  if (lex_is_integer () && lex_integer () == x)
     {
       lex_get ();
       return 1;
@@ -487,7 +560,7 @@ lex_force_match (int t)
     }
   else
     {
-      lex_error (_("expecting %s"), lex_token_name (t));
+      lex_error (_("expecting `%s'"), lex_token_name (t));
       return 0;
     }
 }
@@ -511,7 +584,7 @@ lex_force_string (void)
 int
 lex_force_int (void)
 {
-  if (lex_integer_p ())
+  if (lex_is_integer ())
     return 1;
   else
     {
@@ -525,7 +598,7 @@ lex_force_int (void)
 int
 lex_force_num (void)
 {
-  if (token == T_NUM)
+  if (lex_is_number ())
     return 1;
   else
     {
@@ -547,43 +620,6 @@ lex_force_id (void)
       return 0;
     }
 }
-
-/* Comparing identifiers. */
-
-/* Keywords match if one of the following is true: KW and TOK are
-   identical (barring differences in case), or TOK is at least 3
-   characters long and those characters are identical to KW.  KW_LEN
-   is the length of KW, TOK_LEN is the length of TOK. */
-int
-lex_id_match_len (const char *kw, size_t kw_len,
-		  const char *tok, size_t tok_len)
-{
-  size_t i = 0;
-
-  assert (kw && tok);
-  for (;;)
-    {
-      if (i == kw_len && i == tok_len)
-	return 1;
-      else if (i == tok_len)
-	return i >= 3;
-      else if (i == kw_len)
-	return 0;
-      else if (toupper ((unsigned char) kw[i])
-	       != toupper ((unsigned char) tok[i]))
-	return 0;
-
-      i++;
-    }
-}
-
-/* Same as lex_id_match_len() minus the need to pass in the lengths. */
-int
-lex_id_match (const char *kw, const char *tok)
-{
-  return lex_id_match_len (kw, strlen (kw), tok, strlen (tok));
-}
-
 /* Weird token functions. */
 
 /* Returns the first character of the next token, except that if the
@@ -595,8 +631,8 @@ lex_id_match (const char *kw, const char *tok)
 int
 lex_look_ahead (void)
 {
-  if (put)
-    return put;
+  if (put_token)
+    return put_token;
 
   for (;;)
     {
@@ -615,8 +651,8 @@ lex_look_ahead (void)
 	  else if (!lex_get_line ())
 	    unexpected_eof ();
 
-	  if (put)
-	    return put;
+	  if (put_token) 
+	    return put_token;
 	}
 
       if ((toupper ((unsigned char) *prog) == 'X'
@@ -633,45 +669,41 @@ lex_look_ahead (void)
 void
 lex_put_back (int t)
 {
-  put = token;
+  save_token ();
   token = t;
 }
 
-/* Makes T the next token read. */
+/* Makes the current token become the next token to be read; the
+   current token is set to the identifier ID. */
 void
-lex_put_forward (int t)
+lex_put_back_id (const char *id)
 {
-  put = t;
+  assert (lex_id_to_token (id, strlen (id)) == T_ID);
+  save_token ();
+  token = T_ID;
+  ds_replace (&tokstr, id);
+  str_copy_trunc (tokid, sizeof tokid, ds_c_str (&tokstr));
 }
 
 /* Weird line processing functions. */
 
-/* Discards the rest of the current input line for tokenization
-   purposes, but returns the entire contents of the line for use by
-   the caller. */
-char *
+/* Returns the entire contents of the current line. */
+const char *
 lex_entire_line (void)
 {
-  prog = ds_end (&getl_buf);
-  dot = 0;
-  return ds_value (&getl_buf);
+  return ds_c_str (&getl_buf);
 }
 
 /* As lex_entire_line(), but only returns the part of the current line
    that hasn't already been tokenized.
-   If HAD_DOT is non-null, stores nonzero into *HAD_DOT if the line
+   If END_DOT is non-null, stores nonzero into *END_DOT if the line
    ends with a terminal dot, or zero if it doesn't. */
-char *
-lex_rest_of_line (int *had_dot)
+const char *
+lex_rest_of_line (int *end_dot)
 {
-  char *s = prog;
-  prog = ds_end (&getl_buf);
-
-  if (had_dot)
-    *had_dot = dot;
-  dot = 0;
-
-  return s;
+  if (end_dot)
+    *end_dot = dot;
+  return prog;
 }
 
 /* Causes the rest of the current input line to be ignored for
@@ -679,11 +711,8 @@ lex_rest_of_line (int *had_dot)
 void
 lex_discard_line (void)
 {
-  msg (SW, _("The rest of this command has been discarded."));
-
-  ds_clear (&getl_buf);
-  prog = ds_value (&getl_buf);
-  dot = put = 0;
+  prog = ds_end (&getl_buf);
+  dot = put_token = 0;
 }
 
 /* Sets the current position in the current line to P, which must be
@@ -724,9 +753,9 @@ lex_preprocess_line (void)
     int quote;
 
     /* Remove C-style comments begun by slash-star and terminated by
-     star-slash or newline. */
+       star-slash or newline. */
     quote = comment = 0;
-    for (cp = ds_value (&getl_buf); *cp; )
+    for (cp = ds_c_str (&getl_buf); *cp; )
       {
 	/* If we're not commented out, toggle quoting. */
 	if (!comment)
@@ -767,19 +796,19 @@ lex_preprocess_line (void)
   /* Strip trailing whitespace and terminal dot. */
   {
     size_t len = ds_length (&getl_buf);
-    char *s = ds_value (&getl_buf);
+    char *s = ds_c_str (&getl_buf);
     
     /* Strip trailing whitespace. */
     while (len > 0 && isspace ((unsigned char) s[len - 1]))
       len--;
 
     /* Check for and remove terminal dot. */
-    if (len > 0 && s[len - 1] == set_endcmd)
+    if (len > 0 && s[len - 1] == get_endcmd ())
       {
 	dot = 1;
 	len--;
       }
-    else if (len == 0 && set_nullline)
+    else if (len == 0 && get_nulline ())
       dot = 1;
     else
       dot = 0;
@@ -792,15 +821,15 @@ lex_preprocess_line (void)
      as necessary. */
   if (getl_interactive != 2 && getl_mode == GETL_MODE_BATCH)
     {
-      char *s = ds_value (&getl_buf);
+      char *s = ds_c_str (&getl_buf);
       
       if (s[0] == '+' || s[0] == '-' || s[0] == '.')
 	s[0] = ' ';
       else if (s[0] && !isspace ((unsigned char) s[0]))
-	lex_put_forward ('.');
+	put_token = '.';
     }
 
-  prog = ds_value (&getl_buf);
+  prog = ds_c_str (&getl_buf);
 }
 
 /* Token names. */
@@ -832,8 +861,9 @@ lex_token_representation (void)
   switch (token)
     {
     case T_ID:
-    case T_NUM:
-      return xstrdup (ds_value (&tokstr));
+    case T_POS_NUM:
+    case T_NEG_NUM:
+      return xstrdup (ds_c_str (&tokstr));
       break;
 
     case T_STRING:
@@ -841,7 +871,7 @@ lex_token_representation (void)
 	int hexstring = 0;
 	char *sp, *dp;
 
-	for (sp = ds_value (&tokstr); sp < ds_end (&tokstr); sp++)
+	for (sp = ds_c_str (&tokstr); sp < ds_end (&tokstr); sp++)
 	  if (!isprint ((unsigned char) *sp))
 	    {
 	      hexstring = 1;
@@ -856,14 +886,14 @@ lex_token_representation (void)
 	*dp++ = '\'';
 
 	if (!hexstring)
-	  for (sp = ds_value (&tokstr); *sp; )
+	  for (sp = ds_c_str (&tokstr); *sp; )
 	    {
 	      if (*sp == '\'')
 		*dp++ = '\'';
 	      *dp++ = (unsigned char) *sp++;
 	    }
 	else
-	  for (sp = ds_value (&tokstr); sp < ds_end (&tokstr); sp++)
+	  for (sp = ds_c_str (&tokstr); sp < ds_end (&tokstr); sp++)
 	    {
 	      *dp++ = (((unsigned char) *sp) >> 4)["0123456789ABCDEF"];
 	      *dp++ = (((unsigned char) *sp) & 15)["0123456789ABCDEF"];
@@ -907,12 +937,13 @@ lex_token_representation (void)
 void
 lex_negative_to_dash (void)
 {
-  if (token == T_NUM && tokval < 0.0)
+  if (token == T_NEG_NUM)
     {
-      token = '-';
+      token = T_POS_NUM;
       tokval = -tokval;
-      ds_replace (&tokstr, ds_value (&tokstr) + 1);
-      lex_put_forward (T_NUM);
+      ds_replace (&tokstr, ds_c_str (&tokstr) + 1);
+      save_token ();
+      token = '-';
     }
 }
    
@@ -929,8 +960,14 @@ lex_skip_comment (void)
 {
   for (;;)
     {
-      lex_get_line ();
-      if (put == '.')
+      if (!lex_get_line ()) 
+        {
+          put_token = T_STOP;
+          eof = 1;
+          return;
+        }
+      
+      if (put_token == '.')
 	break;
 
       prog = ds_end (&getl_buf);
@@ -948,23 +985,6 @@ unexpected_eof (void)
   msg (FE, _("Unexpected end of file."));
 }
 
-/* Returns the proper token type, either T_ID or a reserved keyword
-   enum, for ID[], which must contain LEN characters. */
-static inline int
-check_id (const char *id, size_t len)
-{
-  const char **kwp;
-
-  if (len < 2 || len > 4)
-    return T_ID;
-  
-  for (kwp = keywords; *kwp; kwp++)
-    if (!strcmp (*kwp, id))
-      return T_FIRST_KEYWORD + (kwp - keywords);
-
-  return T_ID;
-}
-
 /* When invoked, tokstr contains a string of binary, octal, or hex
    digits, for values of TYPE of 0, 1, or 2, respectively.  The string
    is converted to characters having the specified values. */
@@ -989,7 +1009,7 @@ convert_numeric_string_to_char_string (int type)
 	       "multiple of %d."),
 	 gettext (base_name), ds_length (&tokstr), cpb);
 
-  p = ds_value (&tokstr);
+  p = ds_c_str (&tokstr);
   for (i = 0; i < nb; i++)
     {
       int value;
@@ -1019,7 +1039,7 @@ convert_numeric_string_to_char_string (int type)
 	  value = value * base + v;
 	}
 
-      ds_value (&tokstr)[i] = (unsigned char) value;
+      ds_c_str (&tokstr)[i] = (unsigned char) value;
     }
 
   ds_truncate (&tokstr, nb);
@@ -1058,7 +1078,7 @@ parse_string (int type)
 		break;
 	    }
 
-	  ds_putchar (&tokstr, *prog++);
+	  ds_putc (&tokstr, *prog++);
 	}
       prog++;
 
@@ -1128,7 +1148,7 @@ finish:
     int warned = 0;
 
     for (i = 0; i < ds_length (&tokstr); i++)
-      if (ds_value (&tokstr)[i] == 0)
+      if (ds_c_str (&tokstr)[i] == 0)
 	{
 	  if (!warned)
 	    {
@@ -1136,7 +1156,7 @@ finish:
 			 "characters.  Replacing with spaces."));
 	      warned = 1;
 	    }
-	  ds_value (&tokstr)[i] = ' ';
+	  ds_c_str (&tokstr)[i] = ' ';
 	}
   }
 
@@ -1155,41 +1175,42 @@ dump_token (void)
 
     getl_location (&curfn, &curln);
     if (curfn)
-      printf ("%s:%d\t", curfn, curln);
+      fprintf (stderr, "%s:%d\t", curfn, curln);
   }
   
   switch (token)
     {
     case T_ID:
-      printf ("ID\t%s\n", tokid);
+      fprintf (stderr, "ID\t%s\n", tokid);
       break;
 
-    case T_NUM:
-      printf ("NUM\t%f\n", tokval);
+    case T_POS_NUM:
+    case T_NEG_NUM:
+      fprintf (stderr, "NUM\t%f\n", tokval);
       break;
 
     case T_STRING:
-      printf ("STRING\t\"%s\"\n", ds_value (&tokstr));
+      fprintf (stderr, "STRING\t\"%s\"\n", ds_c_str (&tokstr));
       break;
 
     case T_STOP:
-      printf ("STOP\n");
+      fprintf (stderr, "STOP\n");
       break;
 
     case T_EXP:
-      puts ("MISC\tEXP");
+      fprintf (stderr, "MISC\tEXP\"");
       break;
 
     case 0:
-      puts ("MISC\tEOF");
+      fprintf (stderr, "MISC\tEOF\n");
       break;
 
     default:
       if (token >= T_FIRST_KEYWORD && token <= T_LAST_KEYWORD)
-	printf ("KEYWORD\t%s\n", lex_token_name (token));
+	fprintf (stderr, "KEYWORD\t%s\n", lex_token_name (token));
       else
-	printf ("PUNCT\t%c\n", token);
+	fprintf (stderr, "PUNCT\t%c\n", token);
       break;
     }
 }
-#endif /* DEBUGGING */
+#endif /* DUMP_TOKENS */