1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "language/command.h"
22 #include "language/lexer/lexer.h"
23 #include "language/lexer/macro.h"
24 #include "language/lexer/scan.h"
25 #include "language/lexer/token.h"
26 #include "libpspp/intern.h"
27 #include "libpspp/message.h"
29 #include "gl/xalloc.h"
32 #define _(msgid) gettext (msgid)
35 match_macro_id (struct lexer *lexer, const char *keyword)
37 if (keyword[0] != '!')
38 return lex_match_id (lexer, keyword);
39 else if (lex_token (lexer) == T_MACRO_ID
40 && lex_id_match_n (ss_cstr (keyword), lex_tokss (lexer), 4))
49 /* Obtains a quoted string from LEXER and then tokenizes the quoted string's
50 content to obtain a single TOKEN. Returns true if successful, false
51 otherwise. The caller takes ownership of TOKEN on success, otherwise TOKEN
54 parse_quoted_token (struct lexer *lexer, struct token *token)
56 if (!lex_force_string (lexer))
59 struct substring s = lex_tokss (lexer);
60 struct string_lexer slex;
61 string_lexer_init (&slex, s.string, s.length, SEG_MODE_INTERACTIVE, true);
62 struct token another_token = { .type = T_STOP };
63 if (string_lexer_next (&slex, token) != SLR_TOKEN
64 || string_lexer_next (&slex, &another_token) != SLR_END)
67 token_uninit (&another_token);
68 lex_error (lexer, _("String must contain exactly one token."));
76 dup_arg_type (struct lexer *lexer, bool *saw_arg_type)
80 lex_next_error (lexer, -1, -1,
81 _("Only one of !TOKENS, !CHAREND, !ENCLOSE, or "
82 "!CMDEND is allowed."));
93 parse_macro_body (struct lexer *lexer, struct macro_tokens *mts)
95 *mts = (struct macro_tokens) { .n = 0 };
96 struct string body = DS_EMPTY_INITIALIZER;
97 struct msg_point start = lex_ofs_start_point (lexer, lex_ofs (lexer));
98 while (!match_macro_id (lexer, "!ENDDEFINE"))
100 if (lex_token (lexer) != T_STRING)
103 _("Syntax error expecting macro body or !ENDDEFINE."));
108 ds_put_substring (&body, lex_tokss (lexer));
109 ds_put_byte (&body, '\n');
113 struct segmenter segmenter = segmenter_init (lex_get_syntax_mode (lexer),
115 struct substring p = body.ss;
119 enum segment_type type;
120 int seg_len = segmenter_push (&segmenter, p.string,
121 p.length, true, &type);
122 assert (seg_len >= 0);
124 struct macro_token mt = {
125 .token = { .type = T_STOP },
126 .syntax = ss_head (p, seg_len),
128 enum tokenize_result result
129 = token_from_segment (type, mt.syntax, &mt.token);
130 ss_advance (&p, seg_len);
138 macro_tokens_add (mts, &mt);
143 size_t start_offset = mt.syntax.string - body.ss.string;
144 size_t end_offset = start_offset + (mt.syntax.length ? mt.syntax.length - 1 : 0);
146 const struct msg_location loc = {
147 .file_name = intern_new_if_nonnull (lex_get_file_name (lexer)),
148 .start = msg_point_advance (start, ss_buffer (body.ss.string, start_offset)),
149 .end = msg_point_advance (start, ss_buffer (body.ss.string, end_offset)),
150 .src = CONST_CAST (struct lex_source *, lex_source (lexer)),
152 msg_at (SE, &loc, "%s", mt.token.string.string);
153 intern_unref (loc.file_name);
160 token_uninit (&mt.token);
167 cmd_define (struct lexer *lexer, struct dataset *ds UNUSED)
171 The macro name is a T_STRING token, even though it's an identifier,
172 because that's the way that the segmenter prevents it from getting
174 if (lex_token (lexer) != T_STRING)
176 lex_error (lexer, _("Syntax error expecting identifier."));
179 const char *name = lex_tokcstr (lexer);
180 if (!id_is_plausible (name + (name[0] == '!')))
182 lex_error (lexer, _("Syntax error expecting identifier."));
186 struct macro *m = xmalloc (sizeof *m);
187 *m = (struct macro) { .name = xstrdup (name) };
188 struct msg_point macro_start = lex_ofs_start_point (lexer, lex_ofs (lexer));
191 if (!lex_force_match (lexer, T_LPAREN))
194 size_t allocated_params = 0;
196 while (!lex_match (lexer, T_RPAREN))
198 if (m->n_params >= allocated_params)
199 m->params = x2nrealloc (m->params, &allocated_params,
202 size_t param_index = m->n_params++;
203 struct macro_param *p = &m->params[param_index];
204 *p = (struct macro_param) { .expand_arg = true };
206 /* Parse parameter name. */
207 if (match_macro_id (lexer, "!POSITIONAL"))
209 if (param_index > 0 && !m->params[param_index - 1].positional)
211 lex_next_error (lexer, -1, -1,
212 _("Positional parameters must precede "
213 "keyword parameters."));
214 lex_ofs_msg (lexer, SN, keyword_ofs, keyword_ofs,
215 _("Here is a previous keyword parameter."));
219 p->positional = true;
220 p->name = xasprintf ("!%zu", param_index + 1);
224 if (keyword_ofs == 0)
225 keyword_ofs = lex_ofs (lexer);
226 if (lex_token (lexer) == T_MACRO_ID)
228 lex_error (lexer, _("Keyword macro parameter must be named in "
229 "definition without \"!\" prefix."));
232 if (!lex_force_id (lexer))
235 if (is_macro_keyword (lex_tokss (lexer)))
237 lex_error (lexer, _("Cannot use macro keyword \"%s\" "
238 "as an argument name."),
239 lex_tokcstr (lexer));
243 p->positional = false;
244 p->name = xasprintf ("!%s", lex_tokcstr (lexer));
247 lex_match (lexer, T_EQUALS);
249 bool saw_default = false;
250 bool saw_arg_type = false;
253 if (match_macro_id (lexer, "!DEFAULT"))
259 _("!DEFAULT is allowed only once per argument."));
264 if (!lex_force_match (lexer, T_LPAREN))
267 /* XXX Should this handle balanced inner parentheses? */
268 while (!lex_match (lexer, T_RPAREN))
270 if (lex_token (lexer) == T_ENDCMD)
272 lex_error_expecting (lexer, ")");
275 char *syntax = lex_next_representation (lexer, 0, 0);
276 const struct macro_token mt = {
277 .token = *lex_next (lexer, 0),
278 .syntax = ss_cstr (syntax),
280 macro_tokens_add (&p->def, &mt);
286 else if (match_macro_id (lexer, "!NOEXPAND"))
287 p->expand_arg = false;
288 else if (match_macro_id (lexer, "!TOKENS"))
290 if (!dup_arg_type (lexer, &saw_arg_type)
291 || !lex_force_match (lexer, T_LPAREN)
292 || !lex_force_int_range (lexer, "!TOKENS", 1, INT_MAX))
294 p->arg_type = ARG_N_TOKENS;
295 p->n_tokens = lex_integer (lexer);
297 if (!lex_force_match (lexer, T_RPAREN))
300 else if (match_macro_id (lexer, "!CHAREND"))
302 if (!dup_arg_type (lexer, &saw_arg_type))
305 p->arg_type = ARG_CHAREND;
307 if (!lex_force_match (lexer, T_LPAREN)
308 || !parse_quoted_token (lexer, &p->end)
309 || !lex_force_match (lexer, T_RPAREN))
312 else if (match_macro_id (lexer, "!ENCLOSE"))
314 if (!dup_arg_type (lexer, &saw_arg_type))
317 p->arg_type = ARG_ENCLOSE;
319 if (!lex_force_match (lexer, T_LPAREN)
320 || !parse_quoted_token (lexer, &p->start)
321 || !lex_force_match (lexer, T_COMMA)
322 || !parse_quoted_token (lexer, &p->end)
323 || !lex_force_match (lexer, T_RPAREN))
326 else if (match_macro_id (lexer, "!CMDEND"))
328 if (!dup_arg_type (lexer, &saw_arg_type))
331 p->arg_type = ARG_CMDEND;
338 lex_error_expecting (lexer, "!TOKENS", "!CHAREND", "!ENCLOSE",
343 if (lex_token (lexer) != T_RPAREN && !lex_force_match (lexer, T_SLASH))
347 if (!parse_macro_body (lexer, &m->body))
350 struct msg_point macro_end = lex_ofs_end_point (lexer, lex_ofs (lexer) - 1);
351 m->location = xmalloc (sizeof *m->location);
352 *m->location = (struct msg_location) {
353 .file_name = intern_new_if_nonnull (lex_get_file_name (lexer)),
354 .start = { .line = macro_start.line },
355 .end = { .line = macro_end.line },
358 lex_define_macro (lexer, m);
368 cmd_debug_expand (struct lexer *lexer, struct dataset *ds UNUSED)
370 settings_set_mprint (true);
372 while (lex_token (lexer) != T_STOP)
374 if (!lex_next_is_from_macro (lexer, 0) && lex_token (lexer) != T_ENDCMD)
376 char *rep = lex_next_representation (lexer, 0, 0);
377 msg (MN, "unexpanded token \"%s\"", rep);