S_DEFINE_2,
S_DEFINE_3,
S_DEFINE_4,
+ S_DEFINE_5,
S_BEGIN_DATA_1,
S_BEGIN_DATA_2,
S_BEGIN_DATA_3,
static int
segmenter_parse_number__ (struct segmenter *s, const char *input, size_t n,
- bool eof, enum segment_type *type)
+ bool eof, enum segment_type *type, int ofs)
{
- int ofs;
-
assert (s->state == S_GENERAL);
- ofs = skip_digits (input, n, eof, 0);
+ ofs = skip_digits (input, n, eof, ofs);
if (ofs < 0)
return -1;
case SEG_DO_REPEAT_COMMAND:
case SEG_INLINE_DATA:
case SEG_MACRO_ID:
+ case SEG_MACRO_NAME:
case SEG_MACRO_BODY:
case SEG_START_DOCUMENT:
case SEG_DOCUMENT:
*type = SEG_PUNCT;
return 1;
- case '(': case ')': case ',': case '=': case '-':
- case '[': case ']': case '&': case '|': case '+':
+ case '-':
+ ofs = skip_spaces (input, n, eof, 1);
+ if (ofs < 0)
+ return -1;
+ else if (ofs < n && c_isdigit (input[ofs]))
+ return segmenter_parse_number__ (s, input, n, eof, type, ofs);
+ else if (ofs < n && input[ofs] == '.')
+ {
+ if (ofs + 1 >= n)
+ {
+ if (!eof)
+ return -1;
+ }
+ else if (c_isdigit (input[ofs + 1]))
+ return segmenter_parse_number__ (s, input, n, eof, type, ofs);
+ }
+ /* Fall through. */
+ case '(': case ')': case '{': case ',': case '=': case ';': case ':':
+ case '[': case ']': case '}': case '&': case '|': case '+':
*type = SEG_PUNCT;
s->substate = 0;
return 1;
return -1;
}
else if (c_isdigit (input[1]))
- return segmenter_parse_number__ (s, input, n, eof, type);
+ return segmenter_parse_number__ (s, input, n, eof, type, 0);
int eol = at_end_of_line (input, n, eof, 1);
if (eol < 0)
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
- return segmenter_parse_number__ (s, input, n, eof, type);
+ return segmenter_parse_number__ (s, input, n, eof, type, 0);
case 'u': case 'U':
return segmenter_maybe_parse_string__ (SEG_UNICODE_STRING,
s, input, n, eof, type);
case '!':
- return segmenter_parse_id__ (s, input, n, eof, type);
+ if (n < 2)
+ {
+ if (!eof)
+ return -1;
+ *type = SEG_PUNCT;
+ return 1;
+ }
+ else if (input[1] == '*')
+ {
+ *type = SEG_MACRO_ID;
+ return 2;
+ }
+ else
+ return segmenter_parse_id__ (s, input, n, eof, type);
default:
if (lex_uc_is_space (uc))
if (ofs < 0)
return -1;
- if (input[ofs - 1] == '\r' && input[ofs] == '\n')
+ if (ofs < n && input[ofs - 1] == '\r' && input[ofs] == '\n')
{
if (ofs == 1)
{
- The DEFINE keyword.
+ - An identifier. We transform this into SEG_MACRO_NAME instead of
+ SEG_IDENTIFIER or SEG_MACRO_NAME because this identifier must never be
+ macro-expanded.
+
- Anything but "(".
- "(" followed by a sequence of tokens possibly including balanced parentheses
line, even.
*/
static int
-segmenter_parse_define_1__ (struct segmenter *s,
- const char *input, size_t n, bool eof,
- enum segment_type *type)
+segmenter_parse_define_1_2__ (struct segmenter *s,
+ const char *input, size_t n, bool eof,
+ enum segment_type *type)
{
int ofs = segmenter_subparse (s, input, n, eof, type);
if (ofs < 0)
return -1;
- if (*type == SEG_SEPARATE_COMMANDS
+ if (s->state == S_DEFINE_1
+ && (*type == SEG_IDENTIFIER || *type == SEG_MACRO_ID))
+ {
+ *type = SEG_MACRO_NAME;
+ s->state = S_DEFINE_2;
+ }
+ else if (*type == SEG_SEPARATE_COMMANDS
|| *type == SEG_END_COMMAND
|| *type == SEG_START_COMMAND)
{
}
else if (*type == SEG_PUNCT && input[0] == '(')
{
- s->state = S_DEFINE_2;
+ s->state = S_DEFINE_3;
s->nest = 1;
return ofs;
}
}
static int
-segmenter_parse_define_2__ (struct segmenter *s,
+segmenter_parse_define_3__ (struct segmenter *s,
const char *input, size_t n, bool eof,
enum segment_type *type)
{
s->nest--;
if (!s->nest)
{
- s->state = S_DEFINE_3;
+ s->state = S_DEFINE_4;
s->substate = 0;
}
return ofs;
/* We are in the body of a macro definition, looking for additional lines of
the body or !ENDDEFINE. */
static int
-segmenter_parse_define_3__ (struct segmenter *s,
+segmenter_parse_define_4__ (struct segmenter *s,
const char *input, size_t n, bool eof,
enum segment_type *type)
{
report it as spaces because it's not significant. */
*type = (s->substate == 0 && is_all_spaces (input, ofs)
? SEG_SPACES : SEG_MACRO_BODY);
- s->state = S_DEFINE_4;
+ s->state = S_DEFINE_5;
s->substate = 1;
return ofs;
}
}
static int
-segmenter_parse_define_4__ (struct segmenter *s,
+segmenter_parse_define_5__ (struct segmenter *s,
const char *input, size_t n, bool eof,
enum segment_type *type)
{
if (ofs < 0)
return -1;
- s->state = S_DEFINE_3;
+ s->state = S_DEFINE_4;
return ofs;
}
bytes as part of INPUT, because they have (figuratively) been consumed by
the segmenter.
+ Segments can have zero length, including segment types SEG_END,
+ SEG_SEPARATE_COMMANDS, SEG_START_DOCUMENT, SEG_INLINE_DATA, and SEG_SPACES.
+
Failure occurs only if the segment type of the N bytes in INPUT cannot yet
be determined. In this case segmenter_push() returns -1. If more input is
available, the caller should obtain some more, then call again with a larger
return segmenter_parse_do_repeat_3__ (s, input, n, eof, type);
case S_DEFINE_1:
- return segmenter_parse_define_1__ (s, input, n, eof, type);
case S_DEFINE_2:
- return segmenter_parse_define_2__ (s, input, n, eof, type);
+ return segmenter_parse_define_1_2__ (s, input, n, eof, type);
case S_DEFINE_3:
return segmenter_parse_define_3__ (s, input, n, eof, type);
case S_DEFINE_4:
return segmenter_parse_define_4__ (s, input, n, eof, type);
+ case S_DEFINE_5:
+ return segmenter_parse_define_5__ (s, input, n, eof, type);
case S_BEGIN_DATA_1:
return segmenter_parse_begin_data_1__ (s, input, n, eof, type);
case S_DEFINE_1:
case S_DEFINE_2:
- return s->substate & SS_START_OF_COMMAND ? PROMPT_FIRST : PROMPT_LATER;
case S_DEFINE_3:
+ return s->substate & SS_START_OF_COMMAND ? PROMPT_FIRST : PROMPT_LATER;
case S_DEFINE_4:
+ case S_DEFINE_5:
return PROMPT_DEFINE;
case S_BEGIN_DATA_1: