lexer: Add tokens for '{', '}', ':', ';' for use in the matrix language.

author Ben Pfaff <blp@cs.stanford.edu>

Mon, 6 Dec 2021 05:14:08 +0000 (21:14 -0800)

committer Ben Pfaff <blp@cs.stanford.edu>

Mon, 6 Dec 2021 17:05:31 +0000 (09:05 -0800)
author Ben Pfaff <blp@cs.stanford.edu>
Mon, 6 Dec 2021 05:14:08 +0000 (21:14 -0800)
committer Ben Pfaff <blp@cs.stanford.edu>
Mon, 6 Dec 2021 17:05:31 +0000 (09:05 -0800)
diff --git a/src/data/identifier.c b/src/data/identifier.c

index 07c7675bd5a3000cf3742c85505bf68b2174ebd3..baa0abd1c57528ace8b76c73ebc93717dd73d821 100644 (file)
--- a/src/data/identifier.c
+++ b/src/data/identifier.c
@@ -99,9 +99,21 @@ token_type_to_string (enum token_type token)
      case T_RBRACK:
        return "]";
  
+    case T_LCURLY:
+      return "{";
+
+    case T_RCURLY:
+      return "}";
+
      case T_COMMA:
        return ",";
  
+    case T_SEMICOLON:
+      return ";";
+
+    case T_COLON:
+      return ":";
+
      case T_AND:
        return "AND";
  
diff --git a/src/data/identifier.h b/src/data/identifier.h

index d694a5201d815997d4ca53126875320bdfad686e..dcbce970cda97168ab96a22c3e51660d33efc763 100644 (file)
--- a/src/data/identifier.h
+++ b/src/data/identifier.h
@@ -41,7 +41,11 @@
      TOKEN_TYPE(RPAREN)              /* ) */                             \
      TOKEN_TYPE(LBRACK)              /* [ */                             \
      TOKEN_TYPE(RBRACK)              /* ] */                             \
+    TOKEN_TYPE(LCURLY)              /* { */                             \
+    TOKEN_TYPE(RCURLY)              /* } */                             \
      TOKEN_TYPE(COMMA)               /* , */                             \
+    TOKEN_TYPE(SEMICOLON)           /* ; */                             \
+    TOKEN_TYPE(COLON)               /* : */                             \
                                                                          \
      TOKEN_TYPE(AND)                 /* AND */                           \
      TOKEN_TYPE(OR)                  /* OR */                            \
diff --git a/src/language/lexer/macro.c b/src/language/lexer/macro.c

index a089a3a7c32e1c7538097d0e291d7366d408f46b..e5805f035670b33e7b6f62aea55b58a623818ad4 100644 (file)
--- a/src/language/lexer/macro.c
+++ b/src/language/lexer/macro.c
@@ -334,6 +334,8 @@ classify_token (enum token_type type)
      case T_RPAREN:
      case T_LBRACK:
      case T_RBRACK:
+    case T_LCURLY:
+    case T_RCURLY:
        return TC_PUNCT;
  
      case T_PLUS:
@@ -341,6 +343,7 @@ classify_token (enum token_type type)
      case T_ASTERISK:
      case T_SLASH:
      case T_EQUALS:
+    case T_COLON:
      case T_AND:
      case T_OR:
      case T_NOT:
@@ -359,6 +362,7 @@ classify_token (enum token_type type)
        return TC_BINOP;
  
      case T_COMMA:
+    case T_SEMICOLON:
        return TC_COMMA;
      }
  
diff --git a/src/language/lexer/scan.c b/src/language/lexer/scan.c

index eed9f57c221b13f77af4647ccfe28870728fe658..f2dcebca15ea562543d5fde64b7cac95ec96ab4b 100644 (file)
--- a/src/language/lexer/scan.c
+++ b/src/language/lexer/scan.c
@@ -188,6 +188,8 @@ scan_punct1__ (char c0)
      case '-': return T_DASH;
      case '[': return T_LBRACK;
      case ']': return T_RBRACK;
+    case '{': return T_LCURLY;
+    case '}': return T_RCURLY;
      case '&': return T_AND;
      case '|': return T_OR;
      case '+': return T_PLUS;
@@ -196,6 +198,8 @@ scan_punct1__ (char c0)
      case '<': return T_LT;
      case '>': return T_GT;
      case '~': return T_NOT;
+    case ';': return T_SEMICOLON;
+    case ':': return T_COLON;
      default: return T_MACRO_PUNCT;
      }
  
diff --git a/src/language/lexer/segment.c b/src/language/lexer/segment.c

index 05a9f57236f8b30fc0e26931ab93beaba3b4c715..346910898ce3898b4bec6c62563a1a6f8dfe10bd 100644 (file)
--- a/src/language/lexer/segment.c
+++ b/src/language/lexer/segment.c
@@ -956,8 +956,8 @@ segmenter_parse_mid_command__ (struct segmenter *s,
              return segmenter_parse_number__ (s, input, n, eof, type, ofs);
          }
        /* Fall through. */
-    case '(': case ')': case ',': case '=':
-    case '[': case ']': case '&': case '|': case '+':
+    case '(': case ')': case '{': case ',': case '=': case ';': case ':':
+    case '[': case ']': case '}': case '&': case '|': case '+':
        *type = SEG_PUNCT;
        s->substate = 0;
        return 1;
diff --git a/tests/language/lexer/scan.at b/tests/language/lexer/scan.at

index 90dea5d34601fe7531e966144f94b20218ff8dad..c877628fdf9e43f420f4e6cd268c003d5e21d5ba 100644 (file)
--- a/tests/language/lexer/scan.at
+++ b/tests/language/lexer/scan.at
@@ -170,13 +170,13 @@ LBRACK
  RBRACK
  EXP
  MACRO_PUNCT "%"
-MACRO_PUNCT ":"
-MACRO_PUNCT ";"
+COLON
+SEMICOLON
  MACRO_PUNCT "?"
  MACRO_PUNCT "_"
  MACRO_PUNCT "`"
-MACRO_PUNCT "{"
-MACRO_PUNCT "}"
+LCURLY
+RCURLY
  NOT
  STOP
  ])
author	Ben Pfaff <blp@cs.stanford.edu>
	Mon, 6 Dec 2021 05:14:08 +0000 (21:14 -0800)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Mon, 6 Dec 2021 17:05:31 +0000 (09:05 -0800)
src/data/identifier.c		patch \| blob \| history
src/data/identifier.h		patch \| blob \| history
src/language/lexer/macro.c		patch \| blob \| history
src/language/lexer/scan.c		patch \| blob \| history
src/language/lexer/segment.c		patch \| blob \| history
tests/language/lexer/scan.at		patch \| blob \| history