S_DO_REPEAT_1,
S_DO_REPEAT_2,
S_DO_REPEAT_3,
+ S_DEFINE_1,
+ S_DEFINE_2,
+ S_DEFINE_3,
+ S_DEFINE_4,
S_BEGIN_DATA_1,
S_BEGIN_DATA_2,
S_BEGIN_DATA_3,
return is_end_of_line (input, n, eof, ofs);
}
+static bool
+is_all_spaces (const char *input_, size_t n)
+{
+ const uint8_t *input = CHAR_CAST (const uint8_t *, input_);
+
+ int mblen;
+ for (int ofs = 0; ofs < n; ofs += mblen)
+ {
+ ucs4_t uc;
+ mblen = u8_mbtouc (&uc, input + ofs, n - ofs);
+ if (!lex_uc_is_space (uc))
+ return false;
+ }
+ return true;
+}
+
static int
segmenter_parse_newline__ (const char *input, size_t n, bool eof,
enum segment_type *type)
}
/* fall through */
- case SEG_MACRO_ID:
case SEG_NUMBER:
case SEG_QUOTED_STRING:
case SEG_HEX_STRING:
case SEG_COMMENT_COMMAND:
case SEG_DO_REPEAT_COMMAND:
case SEG_INLINE_DATA:
+ case SEG_MACRO_ID:
+ case SEG_MACRO_BODY:
case SEG_START_DOCUMENT:
case SEG_DOCUMENT:
case SEG_START_COMMAND:
case SEG_END:
case SEG_EXPECTED_QUOTE:
case SEG_EXPECTED_EXPONENT:
- case SEG_UNEXPECTED_DOT:
case SEG_UNEXPECTED_CHAR:
id[0] = '\0';
return ofs + retval;
return ofs;
}
}
+ else if (lex_id_match_n (ss_cstr ("DEFINE"), word, 6))
+ {
+ s->state = S_DEFINE_1;
+ return ofs;
+ }
else if (lex_id_match (ss_cstr ("FILE"), word))
{
char id[16];
s->substate = SS_START_OF_COMMAND;
}
else
- *type = SEG_UNEXPECTED_DOT;
+ *type = SEG_PUNCT;
return 1;
case '0': case '1': case '2': case '3': case '4':
}
else if (lex_uc_is_id1 (uc))
return segmenter_parse_id__ (s, input, n, eof, type);
+ else if (uc > 32 && uc < 127 && uc != '\\' && uc != '^')
+ {
+ *type = SEG_PUNCT;
+ s->substate = 0;
+ return 1;
+ }
else
{
*type = SEG_UNEXPECTED_CHAR;
}
}
+/* We are segmenting a DEFINE command, which consists of:
+
+ - The DEFINE keyword.
+
+ - Anything but "(".
+
+ - "(" followed by a sequence of tokens possibly including balanced parentheses
+ up to a final ")".
+
+ - A sequence of any number of lines, one string per line, ending with
+ "!ENDDEFINE". The first line is usually blank (that is, a newline follows
+ the "("). The last line usually just has "!ENDDEFINE." on it, but it can
+ start with other tokens. The whole DEFINE...!ENDDEFINE can be on a single
+ line, even.
+ */
+static int
+segmenter_parse_define_1__ (struct segmenter *s,
+ const char *input, size_t n, bool eof,
+ enum segment_type *type)
+{
+ int ofs = segmenter_subparse (s, input, n, eof, type);
+ if (ofs < 0)
+ return -1;
+
+ if (*type == SEG_SEPARATE_COMMANDS
+ || *type == SEG_END_COMMAND
+ || *type == SEG_START_COMMAND)
+ {
+ /* The DEFINE command is malformed because we reached its end without
+ ever hitting a "(" token. Transition back to general parsing. */
+ s->state = S_GENERAL;
+ return ofs;
+ }
+ else if (*type == SEG_PUNCT && input[0] == '(')
+ {
+ s->state = S_DEFINE_2;
+ s->nest = 1;
+ return ofs;
+ }
+
+ return ofs;
+}
+
+static int
+segmenter_parse_define_2__ (struct segmenter *s,
+ const char *input, size_t n, bool eof,
+ enum segment_type *type)
+{
+ int ofs = segmenter_subparse (s, input, n, eof, type);
+ if (ofs < 0)
+ return -1;
+
+ if (*type == SEG_SEPARATE_COMMANDS
+ || *type == SEG_END_COMMAND
+ || *type == SEG_START_COMMAND)
+ {
+ /* The DEFINE command is malformed because we reached its end before
+ closing the set of parentheses. Transition back to general
+ parsing. */
+ s->state = S_GENERAL;
+ return ofs;
+ }
+ else if (*type == SEG_PUNCT && input[0] == '(')
+ {
+ s->nest++;
+ return ofs;
+ }
+ else if (*type == SEG_PUNCT && input[0] == ')')
+ {
+ s->nest--;
+ if (!s->nest)
+ {
+ s->state = S_DEFINE_3;
+ s->substate = 0;
+ }
+ return ofs;
+ }
+
+ return ofs;
+}
+
+static size_t
+find_enddefine (struct substring input)
+{
+ size_t n = input.length;
+ const struct substring enddefine = ss_cstr ("!ENDDEFINE");
+ for (size_t i = 0; i + enddefine.length <= n; i++)
+ if (input.string[i] == '!'
+ && ss_equals_case (ss_substr (input, i, enddefine.length), enddefine))
+ return i;
+ return SIZE_MAX;
+}
+
+/* We are in the body of a macro definition, looking for additional lines of
+ the body or !ENDDEFINE. */
+static int
+segmenter_parse_define_3__ (struct segmenter *s,
+ const char *input, size_t n, bool eof,
+ enum segment_type *type)
+{
+ /* Gather a whole line. */
+ const char *newline = memchr (input, '\n', n);
+ int ofs = (newline ? newline - input - (newline > input && newline[-1] == '\r')
+ : eof ? n
+ : -1);
+ if (ofs < 0)
+ return -1;
+
+ /* Does the line contain !ENDDEFINE? */
+ size_t end = find_enddefine (ss_buffer (input, ofs));
+ if (end == SIZE_MAX)
+ {
+ /* No !ENDDEFINE. We have a full line of macro body.
+
+ The line might be blank, whether completely empty or just spaces and
+ comments. That's OK: we need to report blank lines because they can
+ have significance.
+
+ However, if the first line of the macro body (the same line as the
+ closing parenthesis in the argument definition) is blank, we just
+ report it as spaces because it's not significant. */
+ *type = (s->substate == 0 && is_all_spaces (input, ofs)
+ ? SEG_SPACES : SEG_MACRO_BODY);
+ s->state = S_DEFINE_4;
+ s->substate = 1;
+ return ofs;
+ }
+ else
+ {
+ /* Macro ends at the !ENDDEFINE on this line. */
+ s->state = S_GENERAL;
+ s->substate = 0;
+ if (!end)
+ {
+ /* Line starts with !ENDDEFINE. */
+ return segmenter_push (s, input, n, eof, type);
+ }
+ else
+ {
+ if (is_all_spaces (input, end))
+ {
+ /* Line starts with spaces followed by !ENDDEFINE. */
+ *type = SEG_SPACES;
+ }
+ else
+ {
+ /* Line starts with some content followed by !ENDDEFINE. */
+ *type = SEG_MACRO_BODY;
+ }
+ return end;
+ }
+ }
+}
+
+static int
+segmenter_parse_define_4__ (struct segmenter *s,
+ const char *input, size_t n, bool eof,
+ enum segment_type *type)
+{
+ int ofs = segmenter_parse_newline__ (input, n, eof, type);
+ if (ofs < 0)
+ return -1;
+
+ s->state = S_DEFINE_3;
+ return ofs;
+}
+
static int
segmenter_parse_begin_data_1__ (struct segmenter *s,
const char *input, size_t n, bool eof,
case S_DO_REPEAT_3:
return segmenter_parse_do_repeat_3__ (s, input, n, eof, type);
+ case S_DEFINE_1:
+ return segmenter_parse_define_1__ (s, input, n, eof, type);
+ case S_DEFINE_2:
+ return segmenter_parse_define_2__ (s, input, n, eof, type);
+ case S_DEFINE_3:
+ return segmenter_parse_define_3__ (s, input, n, eof, type);
+ case S_DEFINE_4:
+ return segmenter_parse_define_4__ (s, input, n, eof, type);
+
case S_BEGIN_DATA_1:
return segmenter_parse_begin_data_1__ (s, input, n, eof, type);
case S_BEGIN_DATA_2:
case S_DO_REPEAT_3:
return PROMPT_DO_REPEAT;
+ case S_DEFINE_1:
+ case S_DEFINE_2:
+ return s->substate & SS_START_OF_COMMAND ? PROMPT_FIRST : PROMPT_LATER;
+ case S_DEFINE_3:
+ case S_DEFINE_4:
+ return PROMPT_DEFINE;
+
case S_BEGIN_DATA_1:
return PROMPT_FIRST;
case S_BEGIN_DATA_2: