{
if (input[1] == '!')
{
- int ofs;
-
- for (ofs = 2; ofs < n; ofs++)
- if (input[ofs] == '\n')
- {
- if (input[ofs] == '\n' && input[ofs - 1] == '\r')
- ofs--;
-
- s->state = S_GENERAL;
- s->substate = SS_START_OF_COMMAND;
- *type = SEG_SHBANG;
- return ofs;
- }
+ for (int ofs = 2; ; ofs++)
+ {
+ if (ofs >= n)
+ {
+ if (!eof)
+ return -1;
+ }
+ else if (input[ofs] == '\n')
+ {
+ if (input[ofs - 1] == '\r')
+ ofs--;
+ }
+ else
+ continue;
- return eof ? ofs : -1;
+ s->state = S_GENERAL;
+ s->substate = SS_START_OF_COMMAND;
+ *type = SEG_SHBANG;
+ return ofs;
+ }
}
}
else if (!eof)
if (!eof)
return -1;
goto number;
- };
+ }
if (input[ofs] == '.')
{
+ if (ofs + 1 >= n)
+ {
+ if (!eof)
+ return -1;
+ goto number;
+ }
+
ofs = skip_digits (input, n, eof, ofs + 1);
if (ofs < 0)
return -1;
+ else if (ofs >= n)
+ goto number;
}
- if (ofs >= n)
- {
- if (!eof)
- return -1;
- goto number;
- }
if (input[ofs] == 'e' || input[ofs] == 'E')
{
ofs++;
}
/* fall through */
+ case SEG_MACRO_ID:
case SEG_NUMBER:
case SEG_QUOTED_STRING:
case SEG_HEX_STRING:
ofs--;
}
- if (is_reserved_word (input, ofs))
- *type = SEG_RESERVED_WORD;
- else
- *type = SEG_IDENTIFIER;
+ *type = (is_reserved_word (input, ofs) ? SEG_RESERVED_WORD
+ : input[0] == '!' ? SEG_MACRO_ID
+ : SEG_IDENTIFIER);
if (s->substate & SS_START_OF_COMMAND)
{
return segmenter_parse_string__ (SEG_QUOTED_STRING, 0,
s, input, n, eof, type);
+ case '!':
+ return segmenter_parse_id__ (s, input, n, eof, type);
+
default:
if (lex_uc_is_space (uc))
{
}
else if (lex_uc_is_id1 (uc))
return segmenter_parse_id__ (s, input, n, eof, type);
+ else if (uc > 32 && uc < 127 && uc != '\\' && uc != '^')
+ {
+ *type = SEG_PUNCT;
+ s->substate = 0;
+ return 1;
+ }
else
{
*type = SEG_UNEXPECTED_CHAR;
return ofs;
}
+/* We are segmenting a DO REPEAT command, currently reading the syntax that
+ defines the stand-in variables (the head) before the lines of syntax to be
+ repeated (the body). */
static int
segmenter_parse_do_repeat_1__ (struct segmenter *s,
const char *input, size_t n, bool eof,
if (ofs < 0)
return -1;
- if (*type == SEG_START_COMMAND || *type == SEG_SEPARATE_COMMANDS)
- s->state = S_DO_REPEAT_2;
- else if (*type == SEG_END_COMMAND)
+ if (*type == SEG_SEPARATE_COMMANDS)
{
+ /* We reached a blank line that separates the head from the body. */
+ s->state = S_DO_REPEAT_2;
+ }
+ else if (*type == SEG_END_COMMAND || *type == SEG_START_COMMAND)
+ {
+ /* We reached the body. */
s->state = S_DO_REPEAT_3;
s->substate = 1;
}
return ofs;
}
+/* We are segmenting a DO REPEAT command, currently reading a blank line that
+ separates the head from the body. */
static int
segmenter_parse_do_repeat_2__ (struct segmenter *s,
const char *input, size_t n, bool eof,
if (*type == SEG_NEWLINE)
{
+ /* We reached the body. */
s->state = S_DO_REPEAT_3;
s->substate = 1;
}
return ofs - (input[ofs - 1] == '\r');
}
+/* We are in the body of DO REPEAT, segmenting the lines of syntax that are to
+ be repeated. Report each line of syntax as a single SEG_DO_REPEAT_COMMAND.
+
+ DO REPEAT can be nested, so we look for DO REPEAT...END REPEAT blocks inside
+ the lines we're segmenting. s->substate counts the nesting level, starting
+ at 1. */
static int
segmenter_parse_do_repeat_3__ (struct segmenter *s,
const char *input, size_t n, bool eof,
return -1;
else if (s->substate == 0)
{
+ /* Nesting level dropped to 0, so we've finished reading the DO REPEAT
+ body. */
s->state = S_GENERAL;
s->substate = SS_START_OF_COMMAND | SS_START_OF_LINE;
return segmenter_push (s, input, n, eof, type);