From 86a735868a2c4cabf1dfc3971ba6d5d96e51daf0 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 15 Jun 2021 11:53:01 -0700 Subject: [PATCH] Mostly documentation update but some code clarification too. --- doc/flow-control.texi | 132 ++++++++++++++++----- src/language/lexer/macro.c | 193 ++++++++++++++++++++++++++++++- tests/language/control/define.at | 92 ++++++++++++++- 3 files changed, 383 insertions(+), 34 deletions(-) diff --git a/doc/flow-control.texi b/doc/flow-control.texi index 0b3cd27866..20644747db 100644 --- a/doc/flow-control.texi +++ b/doc/flow-control.texi @@ -74,8 +74,15 @@ The following functions may be used within the body: The body may also include the following constructs: -!IF (condition) !THEN true-expansion !ENDIF -!IF (condition) !THEN true-expansion !ELSE false-expansion !ENDIF + !IF (condition) !THEN true-expansion !ENDIF + !IF (condition) !THEN true-expansion !ELSE false-expansion !ENDIF + + !DO !var = start !TO end [!BY step] + body + !DOEND + !DO !var !IN (expression) + body + !DOEND @end display The DEFINE command defines a macro that can later be called any number @@ -550,6 +557,45 @@ to uppercase. @end example @end deffn +@node Macro Expressions +@subsection Macro Expressions + +Macro expressions are used in conditional expansion and loops, which +are described in the following sections. A macro expression may use +the following operators, listed in descending order of operator +precedence: + +@table @code +@item () +Parentheses override the default operator precedence. + +@item !EQ !NE !GT !LT !GE !LE = ~= <> > < >= <= +Relational operators compare their operands and yield a Boolean +result, either @samp{0} for false or @samp{1} for true. + +These operators always compare their operands as strings. This can be +surprising when the strings are numbers because, e.g.,@: @code{1 < +1.0} and @code{10 < 2} both evaluate to @samp{1} (true). + +Comparisons are case sensitive, so that @code{a = A} evaluates to +@samp{0} (false). + +@item !NOT ~ +@itemx !AND & +@itemx !OR | +Logical operators interpret their operands as Boolean values, where +quoted or unquoted @samp{0} is false and anything else is true, and +yield a Boolean result, either @samp{0} for false or @samp{1} for +true. +@end table + +Macro expressions do not include any arithmetic operators. + +An operand in an expression may be a single token (including a macro +argument name) or a macro function invocation. Either way, the +expression evaluator unquotes the operand, so that @code{1 = '1'} is +true. + @node Macro Conditional Expansion @subsection Macro Conditional Expansion @@ -557,42 +603,71 @@ The @code{!IF} construct may be used inside a macro body to allow for conditional expansion. It takes the following forms: @example -!IF (condition) !THEN true-expansion !IFEND -!IF (condition) !THEN true-expansion !ELSE false-expansion !IFEND +!IF (@var{expression}) !THEN @var{true-expansion} !IFEND +!IF (@var{expression}) !THEN @var{true-expansion} !ELSE @var{false-expansion} !IFEND +@end example + +When @var{expression} evaluates to true, the macro processor expands +@var{true-expansion}; otherwise, it expands @var{false-expansion}, if +it is present. The macro processor considers quoted or unquoted +@samp{0} to be false, and anything else to be true. + +@node Macro Loops +@subsection Macro Loops + +The body of a macro may include two forms of loops: loops over +numerical ranges and loops over tokens. Both forms expand a @dfn{loop +body} multiple times, each time setting a named @dfn{loop variable} to +a different value. The loop body typically expands the loop variable +at least once. + +@subsubheading Loops Over Ranges + +@example +!DO @var{!var} = @var{start} !TO @var{end} [!BY @var{step}] + @var{body} +!DOEND @end example -When the @var{condition} evaluates to true, @var{true-expansion} is -expanded. When it evaluates to false, @var{false-expansion} is -expanded, if it is present. The unquoted value @samp{0} is considered -false, and all other values are considered true. +A loop over a numerical range has the form shown above. @var{start}, +@var{end}, and @var{step} (if included) must be expressions with +numeric values. The macro processor accepts both integers and real +numbers. The macro processor expands @var{body} for each numeric +value from @var{start} to @var{end}, inclusive. -Within @var{condition}, macros, macro arguments, and macro functions -are expanded. After expansion, the condition is evaluated as a macro -expression that may use only the following operators, in descending -order of operator precedence: +The default value for @var{step} is 1. If @var{step} is positive and +@math{@var{first} > @var{last}}, or if @var{step} is negative and +@math{@var{first} < @var{last}}, then the macro processor doesn't +expand the body at all. @var{step} may not be zero. + +@subsubheading Loops Over Tokens @example -() -!EQ !NE !GT !LT !GE !LE = ~= <> > < >= <= -!NOT ~ -!AND & -!OR | +!DO @var{!var} !IN (@var{expression}) + @var{body} +!DOEND @end example -All of these operators yield a Boolean result, either @samp{0} for -false or @samp{1} for true. +A loop over tokens takes the form shown above. The macro processor +evaluates @var{expression} and expands @var{body} once per token in +the result, substituting the token for @var{!var} each time it +appears. + +@node Macro Variable Assignment +@subsection Macro Variable Assignment -If an operand is a quoted string, then the operator considers the -contents of the quoted string; otherwise, it must be a single token. -Thus, @code{1 = '1'} is true, and @code{'a b' = a b} is in error -because the right-hand operand is two tokens. +The @code{!LET} construct evaluates an expression and assigns the +result to a macro variable. It may create a new macro variable or +change the value of one created by a previous @code{!LET} or +@code{!DO}, but it may not change the value of a macro argument. +@code{!LET} has the following form: -Comparisons in macro expressions are always string comparisons. This -can be surprising when the operands are numbers: e.g.@: @code{1 < 1.0} -and @code{10 < 2} both evaluate to @samp{1} (true). +@example +!LET @var{!var} = @var{expression} +@end example -Macro expression comparisons are case sensitive, so that @code{a = A} -evaluates to @samp{0} (false). +If @var{expression} is more than one token, it must be enclosed in +parentheses. @node Macro Settings @subsection Macro Settings @@ -634,6 +709,7 @@ Macros in comments. Macros in titles. +Define ``unquote.'' @node DO IF @section DO IF @vindex DO IF diff --git a/src/language/lexer/macro.c b/src/language/lexer/macro.c index 415b84bade..ca32800f30 100644 --- a/src/language/lexer/macro.c +++ b/src/language/lexer/macro.c @@ -34,6 +34,7 @@ #include "libpspp/string-map.h" #include "gl/c-ctype.h" +#include "gl/ftoastr.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -103,7 +104,7 @@ macro_tokens_from_string (struct macro_tokens *mts, const struct substring src, }; struct state state = { - .segmenter = SEGMENTER_INIT (mode), + .segmenter = segmenter_init (mode, true), .body = src, }; struct state saved = state; @@ -821,7 +822,7 @@ parse_macro_function (struct parse_macro_function_ctx *ctx, if (!n_tokens || tokens[0].token.type != T_MACRO_ID - || !ss_equals_case (tokens[0].token.string, function)) + || !ss_equals_case (tokens[0].token.string, function)) /* XXX abbrevs allowed */ return false; if (n_tokens < 2 || tokens[1].token.type != T_LPAREN) @@ -1120,6 +1121,12 @@ macro_evaluate_literal (const struct expr_context *ctx, }; struct string function_output = DS_EMPTY_INITIALIZER; size_t function_consumed = parse_function_arg (&fctx, 0, &function_output); + struct string unquoted = DS_EMPTY_INITIALIZER; + if (unquote_string (ds_cstr (&function_output), &unquoted)) + { + ds_swap (&function_output, &unquoted); + ds_destroy (&unquoted); + } *tokens = p + function_consumed; return ds_steal_cstr (&function_output); } @@ -1315,6 +1322,34 @@ macro_evaluate_expression (const struct macro_token **tokens, size_t n_tokens, return macro_evaluate_or (&ctx, tokens, *tokens + n_tokens); } +static bool +macro_evaluate_number (const struct macro_token **tokens, size_t n_tokens, + int nesting_countdown, const struct macro_set *macros, + const struct macro_expander *me, struct string_map *vars, + bool *expand, double *number) +{ + char *s = macro_evaluate_expression (tokens, n_tokens, nesting_countdown, + macros, me, vars, expand); + if (!s) + return false; + + struct macro_tokens mts = { .n = 0 }; + macro_tokens_from_string (&mts, ss_cstr (s), SEG_MODE_INTERACTIVE /* XXX */); + if (mts.n != 1 || !token_is_number (&mts.mts[0].token)) + { + macro_tokens_print (&mts, stdout); + printf ("expression must evaluate to a number (not %s)\n", s); + free (s); + macro_tokens_uninit (&mts); + return false; + } + + *number = token_number (&mts.mts[0].token); + free (s); + macro_tokens_uninit (&mts); + return true; +} + static const struct macro_token * find_ifend_clause (const struct macro_token *p, const struct macro_token *end) { @@ -1460,6 +1495,151 @@ macro_parse_let (const struct macro_token *tokens, size_t n_tokens, return p - tokens; } +static const struct macro_token * +find_doend (const struct macro_token *p, const struct macro_token *end) +{ + size_t nesting = 0; + for (; p < end; p++) + { + if (p->token.type != T_MACRO_ID) + continue; + + if (ss_equals_case (p->token.string, ss_cstr ("!DO"))) + nesting++; + else if (ss_equals_case (p->token.string, ss_cstr ("!DOEND"))) + { + if (!nesting) + return p; + nesting--; + } + } + printf ("missing !DOEND\n"); + return NULL; +} + +static size_t +macro_expand_do (const struct macro_token *tokens, size_t n_tokens, + int nesting_countdown, const struct macro_set *macros, + const struct macro_expander *me, struct string_map *vars, + bool *expand, struct macro_tokens *exp) +{ + const struct macro_token *p = tokens; + const struct macro_token *end = tokens + n_tokens; + + if (p >= end || !ss_equals_case (p->token.string, ss_cstr ("!DO"))) + return 0; + p++; + + if (p >= end || p->token.type != T_MACRO_ID) + { + printf ("expected macro variable name following !DO\n"); + return 0; + } + const struct substring var_name = p->token.string; + p++; + + if (p < end && p->token.type == T_MACRO_ID + && ss_equals_case (p->token.string, ss_cstr ("!IN"))) + { + p++; + char *list = macro_evaluate_expression (&p, end - p, + nesting_countdown, macros, me, vars, + expand); + if (!list) + return 0; + + struct macro_tokens items = { .n = 0 }; + macro_tokens_from_string (&items, ss_cstr (list), + SEG_MODE_INTERACTIVE /* XXX */); + free (list); + + const struct macro_token *do_end = find_doend (p, end); + if (!do_end) + { + macro_tokens_uninit (&items); + return 0; + } + + const struct macro_tokens inner = { + .mts = CONST_CAST (struct macro_token *, p), + .n = do_end - p + }; + for (size_t i = 0; i < items.n; i++) + { + string_map_replace_nocopy (vars, ss_xstrdup (var_name), + ss_xstrdup (items.mts[i].representation)); + macro_expand (&inner, nesting_countdown, macros, + me, vars, expand, exp); + } + return do_end - tokens + 1; + } + else if (p < end && p->token.type == T_EQUALS) + { + p++; + double first; + if (!macro_evaluate_number (&p, end - p, nesting_countdown, macros, me, + vars, expand, &first)) + return 0; + + if (p >= end || p->token.type != T_MACRO_ID + || !ss_equals_case (p->token.string, ss_cstr ("!TO"))) + { + printf ("expecting !TO\n"); + return 0; + } + p++; + + double last; + if (!macro_evaluate_number (&p, end - p, nesting_countdown, macros, me, + vars, expand, &last)) + return 0; + + double by = 1.0; + if (p < end && p->token.type == T_MACRO_ID + && ss_equals_case (p->token.string, ss_cstr ("!BY"))) + { + p++; + if (!macro_evaluate_number (&p, end - p, nesting_countdown, macros, me, + vars, expand, &by)) + return 0; + + if (by == 0.0) + { + printf ("!BY value cannot be zero\n"); + return 0; + } + } + + const struct macro_token *do_end = find_doend (p, end); + if (!do_end) + return 0; + const struct macro_tokens inner = { + .mts = CONST_CAST (struct macro_token *, p), + .n = do_end - p + }; + + if ((by > 0 && first <= last) || (by < 0 && first >= last)) + for (double index = first; + by > 0 ? (index <= last) : (index >= last); + index += by) + { + char index_s[DBL_BUFSIZE_BOUND]; + c_dtoastr (index_s, sizeof index_s, 0, 0, index); + string_map_replace_nocopy (vars, ss_xstrdup (var_name), + xstrdup (index_s)); + macro_expand (&inner, nesting_countdown, macros, + me, vars, expand, exp); + } + + return do_end - tokens + 1; + } + else + { + printf ("expecting = or !IN in !DO loop\n"); + return 0; + } +} + static void macro_expand (const struct macro_tokens *mts, int nesting_countdown, const struct macro_set *macros, @@ -1599,6 +1779,15 @@ macro_expand (const struct macro_tokens *mts, continue; } + n = macro_expand_do (&mts->mts[i], mts->n - i, + nesting_countdown, macros, me, vars, + expand, exp); + if (n > 0) + { + i += n - 1; + continue; + } + if (ss_equals_case (token->string, ss_cstr ("!onexpand"))) *expand = true; else if (ss_equals_case (token->string, ss_cstr ("!offexpand"))) diff --git a/tests/language/control/define.at b/tests/language/control/define.at index 0571303da1..41557ca711 100644 --- a/tests/language/control/define.at +++ b/tests/language/control/define.at @@ -741,18 +741,102 @@ DEBUG EXPAND. ]) AT_CHECK([pspp --testing-mode define.sps], [0], [dnl v1 = x0y. -v2 = 'x0y'. +v2 = x0y. v3 = 1. v4 = 1. v1 = xy. -v2 = 'xy'. +v2 = xy. v3 = 0. v4 = 1. v1 = xxyzzyy. -v2 = 'xxyzzyy'. +v2 = xxyzzyy. v3 = 0. v4 = 0. ]) -AT_CLEANUP \ No newline at end of file +AT_CLEANUP + +AT_SETUP([macro indexed !DO]) +AT_KEYWORDS([index do]) +AT_DATA([define.sps], [dnl +DEFINE !title(!POS !TOKENS(1)) !1. !ENDDEFINE. + +DEFINE !for(!POS !TOKENS(1) / !POS !TOKENS(1)) +!DO !var = !1 !TO !2 !var !DOEND. +!ENDDEFINE. + +DEFINE !forby(!POS !TOKENS(1) / !POS !TOKENS(1) / !POS !TOKENS(1)) +!DO !var = !1 !TO !2 !BY !3 !var !DOEND. +!ENDDEFINE. + +DEBUG EXPAND. +!title "increasing". +!for 1 5. +!forby 1 5 1. +!forby 1 5 2. +!forby 1 5 2.5. +!forby 1 5 -1. + +!title "decreasing". +!for 5 1. +!forby 5 1 1. +!forby 5 1 -1. +!forby 5 1 -2. +!forby 5 1 -3. + +!title "non-integer". +!for 1.5 3.5. +]) +AT_CHECK([pspp --testing-mode define.sps], [0], [dnl +"increasing". + +1 2 3 4 5. + +1 2 3 4 5. + +1 3 5. + +1 3.5. + +. + +"decreasing". + +. + +. + +5 4 3 2 1. + +5 3 1. + +5 2. + +"non-integer". + +1.5 2.5 3.5. +]) +AT_CLEANUP + +AT_SETUP([macro list !DO]) +AT_KEYWORDS([index do]) +AT_DATA([define.sps], [dnl +DEFINE !for(!POS !CMDEND) +(!DO !i !IN (!1) (!i) !DOEND). +!ENDDEFINE. + +DEBUG EXPAND. +!for a b c. +!for 'foo bar baz quux'. +!for. +]) +AT_CHECK([pspp --testing-mode define.sps], [0], [dnl +( (a) (b) (c) ). + +( (foo) (bar) (baz) (quux) ). + +( ). +]) +AT_CLEANUP + -- 2.30.2