+/* We are segmenting a DEFINE command, which consists of:
+
+ - The DEFINE keyword.
+
+ - An identifier. We transform this into SEG_MACRO_NAME instead of
+ SEG_IDENTIFIER or SEG_MACRO_NAME because this identifier must never be
+ macro-expanded.
+
+ - Anything but "(".
+
+ - "(" followed by a sequence of tokens possibly including balanced parentheses
+ up to a final ")".
+
+ - A sequence of any number of lines, one string per line, ending with
+ "!ENDDEFINE". The first line is usually blank (that is, a newline follows
+ the "("). The last line usually just has "!ENDDEFINE." on it, but it can
+ start with other tokens. The whole DEFINE...!ENDDEFINE can be on a single
+ line, even.
+ */
+static int
+segmenter_parse_define_1_2__ (struct segmenter *s,
+ const char *input, size_t n, bool eof,
+ enum segment_type *type)
+{
+ int ofs = segmenter_subparse (s, input, n, eof, type);
+ if (ofs < 0)
+ return -1;
+
+ if (s->state == S_DEFINE_1
+ && (*type == SEG_IDENTIFIER || *type == SEG_MACRO_ID))
+ {
+ *type = SEG_MACRO_NAME;
+ s->state = S_DEFINE_2;
+ }
+ else if (*type == SEG_SEPARATE_COMMANDS
+ || *type == SEG_END_COMMAND
+ || *type == SEG_START_COMMAND)
+ {
+ /* The DEFINE command is malformed because we reached its end without
+ ever hitting a "(" token. Transition back to general parsing. */
+ s->state = S_GENERAL;
+ return ofs;
+ }
+ else if (*type == SEG_PUNCT && input[0] == '(')
+ {
+ s->state = S_DEFINE_3;
+ s->nest = 1;
+ return ofs;
+ }
+
+ return ofs;
+}
+
+static int
+segmenter_parse_define_3__ (struct segmenter *s,
+ const char *input, size_t n, bool eof,
+ enum segment_type *type)
+{
+ int ofs = segmenter_subparse (s, input, n, eof, type);
+ if (ofs < 0)
+ return -1;
+
+ if (*type == SEG_SEPARATE_COMMANDS
+ || *type == SEG_END_COMMAND
+ || *type == SEG_START_COMMAND)
+ {
+ /* The DEFINE command is malformed because we reached its end before
+ closing the set of parentheses. Transition back to general
+ parsing. */
+ s->state = S_GENERAL;
+ return ofs;
+ }
+ else if (*type == SEG_PUNCT && input[0] == '(')
+ {
+ s->nest++;
+ return ofs;
+ }
+ else if (*type == SEG_PUNCT && input[0] == ')')
+ {
+ s->nest--;
+ if (!s->nest)
+ {
+ s->state = S_DEFINE_4;
+ s->substate = 0;
+ }
+ return ofs;
+ }
+
+ return ofs;
+}
+
+static size_t
+find_enddefine (struct substring input)
+{
+ size_t n = input.length;
+ const struct substring enddefine = ss_cstr ("!ENDDEFINE");
+ for (int ofs = 0;;)
+ {
+ /* Skip !ENDDEFINE in comment. */
+ ofs = skip_spaces_and_comments (input.string, n, true, ofs);
+ if (ofs + enddefine.length > n)
+ return SIZE_MAX;
+
+ char c = input.string[ofs];
+ if (c == '!'
+ && ss_equals_case (ss_substr (input, ofs, enddefine.length),
+ enddefine))
+ return ofs;
+ else if (c == '\'' || c == '"')
+ {
+ /* Skip quoted !ENDDEFINE. */
+ ofs++;
+ for (;;)
+ {
+ if (ofs >= n)
+ return SIZE_MAX;
+ else if (input.string[ofs++] == c)
+ break;
+ }
+ }
+ else
+ ofs++;
+ }
+}
+
+/* We are in the body of a macro definition, looking for additional lines of
+ the body or !ENDDEFINE. */
+static int
+segmenter_parse_define_4__ (struct segmenter *s,
+ const char *input, size_t n, bool eof,
+ enum segment_type *type)
+{
+ /* Gather a whole line. */
+ const char *newline = memchr (input, '\n', n);
+ int ofs = (newline ? newline - input - (newline > input && newline[-1] == '\r')
+ : eof ? n
+ : -1);
+ if (ofs < 0)
+ return -1;
+
+ /* Does the line contain !ENDDEFINE? */
+ size_t end = find_enddefine (ss_buffer (input, ofs));
+ if (end == SIZE_MAX)
+ {
+ /* No !ENDDEFINE. We have a full line of macro body.
+
+ The line might be blank, whether completely empty or just spaces and
+ comments. That's OK: we need to report blank lines because they can
+ have significance.
+
+ However, if the first line of the macro body (the same line as the
+ closing parenthesis in the argument definition) is blank, we just
+ report it as spaces because it's not significant. */
+ *type = (s->substate == 0 && is_all_spaces (input, ofs)
+ ? SEG_SPACES : SEG_MACRO_BODY);
+ s->state = S_DEFINE_5;
+ s->substate = 1;
+ return ofs;
+ }
+ else
+ {
+ /* Macro ends at the !ENDDEFINE on this line. */
+ s->state = S_GENERAL;
+ s->substate = 0;
+ if (!end)
+ {
+ /* Line starts with !ENDDEFINE. */
+ return segmenter_push (s, input, n, eof, type);
+ }
+ else
+ {
+ if (is_all_spaces (input, end))
+ {
+ /* Line starts with spaces followed by !ENDDEFINE. */
+ *type = SEG_SPACES;
+ }
+ else
+ {
+ /* Line starts with some content followed by !ENDDEFINE. */
+ *type = SEG_MACRO_BODY;
+ }
+ return end;
+ }
+ }
+}
+
+static int
+segmenter_parse_define_5__ (struct segmenter *s,
+ const char *input, size_t n, bool eof,
+ enum segment_type *type)
+{
+ int ofs = segmenter_parse_newline__ (input, n, eof, type);
+ if (ofs < 0)
+ return -1;
+
+ s->state = S_DEFINE_4;
+ return ofs;
+}
+