pintos-os.org Git - pspp/blob - src/data/identifier.h

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 1997-9, 2000, 2010, 2011 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #ifndef DATA_IDENTIFIER_H
  18 #define DATA_IDENTIFIER_H 1
  19
  20 #include <ctype.h>
  21 #include <stdbool.h>
  22 #include <unitypes.h>
  23 #include "libpspp/str.h"
  24 #include "gl/verify.h"
  25
  26 #define TOKEN_TYPES                                                     \
  27     TOKEN_TYPE(STOP)                /* End of input. */                 \
  28                                                                         \
  29     TOKEN_TYPE(ID)                  /* Identifier. */                   \
  30     TOKEN_TYPE(POS_NUM)             /* Positive number. */              \
  31     TOKEN_TYPE(NEG_NUM)             /* Negative number. */              \
  32     TOKEN_TYPE(STRING)              /* Quoted string. */                \
  33                                                                         \
  34     TOKEN_TYPE(ENDCMD)              /* . */                             \
  35     TOKEN_TYPE(PLUS)                /* + */                             \
  36     TOKEN_TYPE(DASH)                /* - */                             \
  37     TOKEN_TYPE(ASTERISK)            /* * */                             \
  38     TOKEN_TYPE(SLASH)               /* / */                             \
  39     TOKEN_TYPE(EQUALS)              /* = */                             \
  40     TOKEN_TYPE(LPAREN)              /* (*/                              \
  41     TOKEN_TYPE(RPAREN)              /* ) */                             \
  42     TOKEN_TYPE(LBRACK)              /* [ */                             \
  43     TOKEN_TYPE(RBRACK)              /* ] */                             \
  44     TOKEN_TYPE(LCURLY)              /* { */                             \
  45     TOKEN_TYPE(RCURLY)              /* } */                             \
  46     TOKEN_TYPE(COMMA)               /* , */                             \
  47     TOKEN_TYPE(SEMICOLON)           /* ; */                             \
  48     TOKEN_TYPE(COLON)               /* : */                             \
  49                                                                         \
  50     TOKEN_TYPE(AND)                 /* AND */                           \
  51     TOKEN_TYPE(OR)                  /* OR */                            \
  52     TOKEN_TYPE(NOT)                 /* NOT */                           \
  53                                                                         \
  54     TOKEN_TYPE(EQ)                  /* EQ */                            \
  55     TOKEN_TYPE(GE)                  /* GE or >= */                      \
  56     TOKEN_TYPE(GT)                  /* GT or > */                       \
  57     TOKEN_TYPE(LE)                  /* LE or <= */                      \
  58     TOKEN_TYPE(LT)                  /* LT or < */                       \
  59     TOKEN_TYPE(NE)                  /* NE or ~= */                      \
  60                                                                         \
  61     TOKEN_TYPE(ALL)                 /* ALL */                           \
  62     TOKEN_TYPE(BY)                  /* BY */                            \
  63     TOKEN_TYPE(TO)                  /* TO */                            \
  64     TOKEN_TYPE(WITH)                /* WITH */                          \
  65                                                                         \
  66     TOKEN_TYPE(EXP)                 /* ** */                            \
  67                                                                         \
  68     TOKEN_TYPE(MACRO_ID)            /* Identifier starting with '!'. */ \
  69     TOKEN_TYPE(MACRO_PUNCT)         /* Miscellaneous punctuator. */
  70 /* Token types. */
  71 enum token_type
  72   {
  73 #define TOKEN_TYPE(TYPE) T_##TYPE,
  74     TOKEN_TYPES
  75 #undef TOKEN_TYPE
  76   };
  77 verify(T_STOP == 0);
  78
  79 #define TOKEN_TYPE(TYPE) + 1
  80 enum { TOKEN_N_TYPES = TOKEN_TYPES };
  81 #undef TOKEN_TYPE
  82
  83 const char *token_type_to_name (enum token_type);
  84 const char *token_type_to_string (enum token_type);
  85
  86 /* Tokens. */
  87 bool lex_is_keyword (enum token_type);
  88
  89 /* Validating identifiers. */
  90 #define ID_MAX_LEN 64          /* Maximum length of identifier, in bytes. */
  91
  92 bool id_is_valid (const char *id, const char *dict_encoding, bool issue_error);
  93 bool id_is_plausible (const char *id, bool issue_error);
  94
  95 /* Recognizing identifiers. */
  96 bool lex_is_id1 (char);
  97 bool lex_is_idn (char);
  98 bool lex_uc_is_id1 (ucs4_t);
  99 bool lex_uc_is_idn (ucs4_t);
 100 bool lex_uc_is_space (ucs4_t);
 101 size_t lex_id_get_length (struct substring);
 102
 103 /* Comparing identifiers. */
 104 bool lex_id_match (struct substring keyword, struct substring token);
 105 bool lex_id_match_n (struct substring keyword, struct substring token,
 106                      size_t n);
 107 int lex_id_to_token (struct substring);
 108
 109 #endif /* !data/identifier.h */