src/data/identifier.c

   1 /* PSPP - computes sample statistics.
   2    Copyright (C) 1997-9, 2000, 2005 Free Software Foundation, Inc.
   3    Written by John Darrington <john@darrington.wattle.id.au>
   4
   5    This program is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful, but
  11    WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software
  17    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  18    02110-1301, USA. */
  19
  20 /*
  21    This file is concerned with the definition of the PSPP syntax, NOT the
  22    action of scanning/parsing code .
  23 */
  24
  25 #include <config.h>
  26 #include "identifier.h"
  27
  28
  29 #include <assert.h>
  30 #include <string.h>
  31
  32
  33 /* Table of keywords. */
  34 const char *keywords[T_N_KEYWORDS + 1] =
  35   {
  36     "AND", "OR", "NOT",
  37     "EQ", "GE", "GT", "LE", "LT", "NE",
  38     "ALL", "BY", "TO", "WITH",
  39     NULL,
  40   };
  41
  42 /* Recognizing identifiers. */
  43
  44 /* Returns true if C may be the first character in an
  45    identifier in the current locale. */
  46 bool
  47 lex_is_id1 (char c_)
  48 {
  49   unsigned char c = c_;
  50   return isalpha (c) || c == '@' || c == '#' || c == '$';
  51 }
  52
  53
  54 /* Returns true if C may be a character in an identifier other
  55    than the first. */
  56 bool
  57 lex_is_idn (char c_)
  58 {
  59   unsigned char c = c_;
  60   return lex_is_id1 (c) || isdigit (c) || c == '.' || c == '_';
  61 }
  62
  63 /* If string S begins with an identifier, returns the first
  64    character following it.  Otherwise, returns S unchanged. */
  65 char *
  66 lex_skip_identifier (const char *s)
  67 {
  68   if (lex_is_id1 (*s))
  69     {
  70       s++;
  71       while (lex_is_idn (*s))
  72         s++;
  73     }
  74   return (char *) s;
  75 }
  76 \f
  77 /* Comparing identifiers. */
  78
  79 /* Keywords match if one of the following is true: KW and TOK are
  80    identical (except for differences in case), or TOK is at least 3
  81    characters long and those characters are identical to KW.  KW_LEN
  82    is the length of KW, TOK_LEN is the length of TOK. */
  83 bool
  84 lex_id_match_len (const char *kw, size_t kw_len,
  85                   const char *tok, size_t tok_len)
  86 {
  87   size_t i = 0;
  88
  89   assert (kw && tok);
  90   for (;;)
  91     {
  92       if (i == kw_len && i == tok_len)
  93         return true;
  94       else if (i == tok_len)
  95         return i >= 3;
  96       else if (i == kw_len)
  97         return false;
  98       else if (toupper ((unsigned char) kw[i])
  99                != toupper ((unsigned char) tok[i]))
 100         return false;
 101
 102       i++;
 103     }
 104 }
 105
 106 /* Same as lex_id_match_len() minus the need to pass in the lengths. */
 107 bool
 108 lex_id_match (const char *kw, const char *tok)
 109 {
 110   return lex_id_match_len (kw, strlen (kw), tok, strlen (tok));
 111 }
 112
 113
 114
 115 /* Returns the proper token type, either T_ID or a reserved keyword
 116    enum, for ID[], which must contain LEN characters. */
 117 int
 118 lex_id_to_token (const char *id, size_t len)
 119 {
 120   const char **kwp;
 121
 122   if (len < 2 || len > 4)
 123     return T_ID;
 124
 125   for (kwp = keywords; *kwp; kwp++)
 126     if (!strcasecmp (*kwp, id))
 127       return T_FIRST_KEYWORD + (kwp - keywords);
 128
 129   return T_ID;
 130 }
 131 \f