src/data/identifier.c

   1 /* PSPP - computes sample statistics.
   2    Copyright (C) 1997-9, 2000, 2005 Free Software Foundation, Inc.
   3    Written by John Darrington <john@darrington.wattle.id.au>
   4
   5    This program is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful, but
  11    WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software
  17    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  18    02110-1301, USA. */
  19
  20 /*
  21    This file is concerned with the definition of the PSPP syntax, NOT the
  22    action of scanning/parsing code .
  23 */
  24
  25 #include <config.h>
  26 #include "identifier.h"
  27
  28
  29 #include <assert.h>
  30 #include <string.h>
  31
  32
  33 /* Table of keywords. */
  34 const char *keywords[T_N_KEYWORDS + 1] =
  35   {
  36     "AND", "OR", "NOT",
  37     "EQ", "GE", "GT", "LE", "LT", "NE",
  38     "ALL", "BY", "TO", "WITH",
  39     NULL,
  40   };
  41
  42 /* Recognizing identifiers. */
  43
  44 /* Returns true if C may be the first character in an
  45    identifier. */
  46 bool
  47 lex_is_id1 (char c_)
  48 {
  49   unsigned char c = c_;
  50   return isalpha (c) || c == '@' || c == '#' || c == '$';
  51 }
  52
  53 /* Returns true if C may be a character in an identifier other
  54    than the first. */
  55 bool
  56 lex_is_idn (char c_)
  57 {
  58   unsigned char c = c_;
  59   return lex_is_id1 (c) || isdigit (c) || c == '.' || c == '_';
  60 }
  61
  62 /* If string S begins with an identifier, returns the first
  63    character following it.  Otherwise, returns S unchanged. */
  64 char *
  65 lex_skip_identifier (const char *s)
  66 {
  67   if (lex_is_id1 (*s))
  68     {
  69       s++;
  70       while (lex_is_idn (*s))
  71         s++;
  72     }
  73   return (char *) s;
  74 }
  75 \f
  76 /* Comparing identifiers. */
  77
  78 /* Keywords match if one of the following is true: KW and TOK are
  79    identical (except for differences in case), or TOK is at least 3
  80    characters long and those characters are identical to KW.  KW_LEN
  81    is the length of KW, TOK_LEN is the length of TOK. */
  82 bool
  83 lex_id_match_len (const char *kw, size_t kw_len,
  84                   const char *tok, size_t tok_len)
  85 {
  86   size_t i = 0;
  87
  88   assert (kw && tok);
  89   for (;;)
  90     {
  91       if (i == kw_len && i == tok_len)
  92         return true;
  93       else if (i == tok_len)
  94         return i >= 3;
  95       else if (i == kw_len)
  96         return false;
  97       else if (toupper ((unsigned char) kw[i])
  98                != toupper ((unsigned char) tok[i]))
  99         return false;
 100
 101       i++;
 102     }
 103 }
 104
 105 /* Same as lex_id_match_len() minus the need to pass in the lengths. */
 106 bool
 107 lex_id_match (const char *kw, const char *tok)
 108 {
 109   return lex_id_match_len (kw, strlen (kw), tok, strlen (tok));
 110 }
 111
 112
 113
 114 /* Returns the proper token type, either T_ID or a reserved keyword
 115    enum, for ID[], which must contain LEN characters. */
 116 int
 117 lex_id_to_token (const char *id, size_t len)
 118 {
 119   const char **kwp;
 120
 121   if (len < 2 || len > 4)
 122     return T_ID;
 123
 124   for (kwp = keywords; *kwp; kwp++)
 125     if (!strcasecmp (*kwp, id))
 126       return T_FIRST_KEYWORD + (kwp - keywords);
 127
 128   return T_ID;
 129 }
 130 \f