pintos-os.org Git - pspp/blob - src/data/identifier.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 1997-9, 2000, 2005 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 /*
  18    This file is concerned with the definition of the PSPP syntax, NOT the
  19    action of scanning/parsing code .
  20 */
  21
  22 #include <config.h>
  23 #include "identifier.h"
  24
  25
  26 #include <assert.h>
  27 #include <string.h>
  28 #include <libpspp/assertion.h>
  29
  30 /* Recognizing identifiers. */
  31
  32 /* Returns true if C may be the first character in an
  33    identifier in the current locale. */
  34 bool
  35 lex_is_id1 (char c_)
  36 {
  37   unsigned char c = c_;
  38   return isalpha (c) || c == '@' || c == '#' || c == '$';
  39 }
  40
  41
  42 /* Returns true if C may be a character in an identifier other
  43    than the first. */
  44 bool
  45 lex_is_idn (char c_)
  46 {
  47   unsigned char c = c_;
  48   return lex_is_id1 (c) || isdigit (c) || c == '.' || c == '_';
  49 }
  50
  51 /* Returns the length of the longest prefix of STRING that forms
  52    a valid identifier.  Returns zero if STRING does not begin
  53    with a valid identifier.  */
  54 size_t
  55 lex_id_get_length (struct substring string)
  56 {
  57   size_t length = 0;
  58   if (!ss_is_empty (string) && lex_is_id1 (ss_first (string)))
  59     {
  60       length = 1;
  61       while (length < ss_length (string)
  62              && lex_is_idn (ss_at (string, length)))
  63         length++;
  64     }
  65   return length;
  66 }
  67 \f
  68 /* Comparing identifiers. */
  69
  70 /* Returns true if TOKEN is a case-insensitive match for KEYWORD.
  71
  72    Keywords match if one of the following is true: KEYWORD and
  73    TOKEN are identical, or TOKEN is at least 3 characters long
  74    and those characters are identical to KEYWORD. */
  75 bool
  76 lex_id_match (struct substring keyword, struct substring token)
  77 {
  78   size_t token_len = ss_length (token);
  79   size_t keyword_len = ss_length (keyword);
  80
  81   if (token_len >= 3 && token_len < keyword_len)
  82     return ss_equals_case (ss_head (keyword, token_len), token);
  83   else
  84     return ss_equals_case (keyword, token);
  85 }
  86 \f
  87 /* Table of keywords. */
  88 struct keyword
  89   {
  90     int token;
  91     const struct substring identifier;
  92   };
  93
  94 static const struct keyword keywords[] =
  95   {
  96     { T_AND,  SS_LITERAL_INITIALIZER ("AND") },
  97     { T_OR,   SS_LITERAL_INITIALIZER ("OR") },
  98     { T_NOT,  SS_LITERAL_INITIALIZER ("NOT") },
  99     { T_EQ,   SS_LITERAL_INITIALIZER ("EQ") },
 100     { T_GE,   SS_LITERAL_INITIALIZER ("GE") },
 101     { T_GT,   SS_LITERAL_INITIALIZER ("GT") },
 102     { T_LE,   SS_LITERAL_INITIALIZER ("LE") },
 103     { T_LT,   SS_LITERAL_INITIALIZER ("LT") },
 104     { T_NE,   SS_LITERAL_INITIALIZER ("NE") },
 105     { T_ALL,  SS_LITERAL_INITIALIZER ("ALL") },
 106     { T_BY,   SS_LITERAL_INITIALIZER ("BY") },
 107     { T_TO,   SS_LITERAL_INITIALIZER ("TO") },
 108     { T_WITH, SS_LITERAL_INITIALIZER ("WITH") },
 109   };
 110 static const size_t keyword_cnt = sizeof keywords / sizeof *keywords;
 111
 112 /* Returns true if TOKEN is representable as a keyword. */
 113 bool
 114 lex_is_keyword (int token)
 115 {
 116   const struct keyword *kw;
 117   for (kw = keywords; kw < &keywords[keyword_cnt]; kw++)
 118     if (kw->token == token)
 119       return true;
 120   return false;
 121 }
 122
 123 /* Returns the proper token type, either T_ID or a reserved
 124    keyword enum, for ID. */
 125 int
 126 lex_id_to_token (struct substring id)
 127 {
 128   if (ss_length (id) >= 2 && ss_length (id) <= 4)
 129     {
 130       const struct keyword *kw;
 131       for (kw = keywords; kw < &keywords[keyword_cnt]; kw++)
 132         if (ss_equals_case (kw->identifier, id))
 133           return kw->token;
 134     }
 135
 136   return T_ID;
 137 }
 138
 139 /* Returns the name for the given keyword token type. */
 140 const char *
 141 lex_id_name (int token)
 142 {
 143   const struct keyword *kw;
 144
 145   for (kw = keywords; kw < &keywords[keyword_cnt]; kw++)
 146     if (kw->token == token)
 147       {
 148         /* A "struct substring" is not guaranteed to be
 149            null-terminated, as our caller expects, but in this
 150            case it always will be. */
 151         return ss_data (kw->identifier);
 152       }
 153   NOT_REACHED ();
 154 }