pintos-os.org Git - pspp/blob - src/language/lexer/scan.h

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 2010, 2011, 2013 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #ifndef SCAN_H
  18 #define SCAN_H 1
  19
  20 #include "language/lexer/segment.h"
  21 #include "libpspp/str.h"
  22
  23 struct token;
  24
  25 /* PSPP syntax scanning.
  26
  27    PSPP divides traditional "lexical analysis" or "tokenization" into two
  28    phases: a lower-level phase called "segmentation" and a higher-level phase
  29    called "scanning".  segment.h provides declarations for the segmentation
  30    phase.  This header file contains declarations for the scanning phase.
  31
  32    Scanning accepts as input a stream of segments, which are UTF-8 strings each
  33    labeled with a segment type.  It outputs a stream of "scan tokens", which
  34    are the same as the tokens used by the PSPP parser with a few additional
  35    types.
  36 */
  37
  38 enum tokenize_result
  39   {
  40     TOKENIZE_EMPTY,
  41     TOKENIZE_TOKEN,
  42     TOKENIZE_ERROR
  43   };
  44
  45 enum tokenize_result token_from_segment (enum segment_type, struct substring,
  46                                          struct token *);
  47
  48 struct merger
  49   {
  50     unsigned int state;
  51   };
  52 #define MERGER_INIT { 0 }
  53
  54 int merger_add (struct merger *m, const struct token *in, struct token *out);
  55 \f
  56 /* A simplified lexer for handling syntax in a string. */
  57
  58 struct string_lexer
  59   {
  60     const char *input;
  61     size_t length;
  62     size_t offset;
  63     struct segmenter segmenter;
  64   };
  65
  66 enum string_lexer_result
  67   {
  68     SLR_END,
  69     SLR_TOKEN,
  70     SLR_ERROR
  71   };
  72
  73 void string_lexer_init (struct string_lexer *, const char *input,
  74                         size_t length, enum segmenter_mode, bool is_snippet);
  75 enum string_lexer_result string_lexer_next (struct string_lexer *,
  76                                             struct token *);
  77
  78 #endif /* scan.h */