#include "data/procedure.h"
#include "data/settings.h"
#include "data/variable.h"
+#include "language/lexer/command-name.h"
#include "language/lexer/lexer.h"
#include "language/prompt.h"
#include "libpspp/assertion.h"
static bool in_correct_state (const struct command *, enum cmd_state);
static bool report_state_mismatch (const struct command *, enum cmd_state);
-static const struct command *find_command (const char *name);
static void set_completion_state (enum cmd_state);
\f
/* Command parser. */
return result;
}
-static size_t
-match_strings (const char *a, size_t a_len,
- const char *b, size_t b_len)
-{
- size_t match_len = 0;
-
- while (a_len > 0 && b_len > 0)
- {
- /* Mismatch always returns zero. */
- if (toupper ((unsigned char) *a++) != toupper ((unsigned char) *b++))
- return 0;
-
- /* Advance. */
- a_len--;
- b_len--;
- match_len++;
- }
-
- return match_len;
-}
-
-/* Returns the first character in the first word in STRING,
- storing the word's length in *WORD_LEN. If no words remain,
- returns a null pointer and stores 0 in *WORD_LEN. Words are
- sequences of alphanumeric characters or single
- non-alphanumeric characters. Words are delimited by
- spaces. */
-static const char *
-find_word (const char *string, size_t *word_len)
-{
- /* Skip whitespace and asterisks. */
- while (isspace ((unsigned char) *string))
- string++;
-
- /* End of string? */
- if (*string == '\0')
- {
- *word_len = 0;
- return NULL;
- }
-
- /* Special one-character word? */
- if (!isalnum ((unsigned char) *string))
- {
- *word_len = 1;
- return string;
- }
-
- /* Alphanumeric word. */
- *word_len = 1;
- while (isalnum ((unsigned char) string[*word_len]))
- (*word_len)++;
-
- return string;
-}
-
-/* Returns true if strings A and B can be confused based on
- their first three letters. */
-static bool
-conflicting_3char_prefixes (const char *a, const char *b)
-{
- size_t aw_len, bw_len;
- const char *aw, *bw;
-
- aw = find_word (a, &aw_len);
- bw = find_word (b, &bw_len);
- assert (aw != NULL && bw != NULL);
-
- /* Words that are the same don't conflict. */
- if (aw_len == bw_len && !buf_compare_case (aw, bw, aw_len))
- return false;
-
- /* Words that are otherwise the same in the first three letters
- do conflict. */
- return ((aw_len > 3 && bw_len > 3)
- || (aw_len == 3 && bw_len > 3)
- || (bw_len == 3 && aw_len > 3)) && !buf_compare_case (aw, bw, 3);
-}
-
-/* Returns true if CMD can be confused with another command
- based on the first three letters of its first word. */
-static bool
-conflicting_3char_prefix_command (const struct command *cmd)
-{
- assert (cmd >= commands && cmd < commands + command_cnt);
-
- return ((cmd > commands
- && conflicting_3char_prefixes (cmd[-1].name, cmd[0].name))
- || (cmd < commands + command_cnt
- && conflicting_3char_prefixes (cmd[0].name, cmd[1].name)));
-}
-
-/* Ways that a set of words can match a command name. */
-enum command_match
- {
- MISMATCH, /* Not a match. */
- PARTIAL_MATCH, /* The words begin the command name. */
- COMPLETE_MATCH /* The words are the command name. */
- };
-
-/* Figures out how well the WORD_CNT words in WORDS match CMD,
- and returns the appropriate enum value. If WORDS are a
- partial match for CMD and the next word in CMD is a dash, then
- *DASH_POSSIBLE is set to 1 if DASH_POSSIBLE is non-null;
- otherwise, *DASH_POSSIBLE is unchanged. */
-static enum command_match
-cmd_match_words (const struct command *cmd,
- char *const words[], size_t word_cnt,
- int *dash_possible)
-{
- const char *word;
- size_t word_len;
- size_t word_idx;
-
- for (word = find_word (cmd->name, &word_len), word_idx = 0;
- word != NULL && word_idx < word_cnt;
- word = find_word (word + word_len, &word_len), word_idx++)
- if (word_len != strlen (words[word_idx])
- || buf_compare_case (word, words[word_idx], word_len))
- {
- size_t match_chars = match_strings (word, word_len,
- words[word_idx],
- strlen (words[word_idx]));
- if (match_chars == 0)
- {
- /* Mismatch. */
- return MISMATCH;
- }
- else if (match_chars == 1 || match_chars == 2)
- {
- /* One- and two-character abbreviations are not
- acceptable. */
- return MISMATCH;
- }
- else if (match_chars == 3)
- {
- /* Three-character abbreviations are acceptable
- in the first word of a command if there are
- no name conflicts. They are always
- acceptable after the first word. */
- if (word_idx == 0 && conflicting_3char_prefix_command (cmd))
- return MISMATCH;
- }
- else /* match_chars > 3 */
- {
- /* Four-character and longer abbreviations are
- always acceptable. */
- }
- }
-
- if (word == NULL && word_idx == word_cnt)
- {
- /* cmd->name = "FOO BAR", words[] = {"FOO", "BAR"}. */
- return COMPLETE_MATCH;
- }
- else if (word == NULL)
- {
- /* cmd->name = "FOO BAR", words[] = {"FOO", "BAR", "BAZ"}. */
- return MISMATCH;
- }
- else
- {
- /* cmd->name = "FOO BAR BAZ", words[] = {"FOO", "BAR"}. */
- if (word[0] == '-' && dash_possible != NULL)
- *dash_possible = 1;
- return PARTIAL_MATCH;
- }
-}
-
-/* Returns the number of commands for which the WORD_CNT words in
- WORDS are a partial or complete match. If some partial match
- has a dash as the next word, then *DASH_POSSIBLE is set to 1,
- otherwise it is set to 0. */
static int
-count_matching_commands (char *const words[], size_t word_cnt,
- int *dash_possible)
+find_best_match (struct substring s, const struct command **matchp)
{
const struct command *cmd;
- int cmd_match_count;
+ struct command_matcher cm;
+ int missing_words;
- cmd_match_count = 0;
- *dash_possible = 0;
- for (cmd = commands; cmd < commands + command_cnt; cmd++)
- if (cmd_match_words (cmd, words, word_cnt, dash_possible) != MISMATCH)
- cmd_match_count++;
+ command_matcher_init (&cm, s);
+ for (cmd = commands; cmd < &commands[command_cnt]; cmd++)
+ command_matcher_add (&cm, ss_cstr (cmd->name), CONST_CAST (void *, cmd));
- return cmd_match_count;
-}
-
-/* Returns the command for which the WORD_CNT words in WORDS are
- a complete match. Returns a null pointer if no such command
- exists. */
-static const struct command *
-get_complete_match (char *const words[], size_t word_cnt)
-{
- const struct command *cmd;
+ *matchp = command_matcher_get_match (&cm);
+ missing_words = command_matcher_get_missing_words (&cm);
- for (cmd = commands; cmd < commands + command_cnt; cmd++)
- if (cmd_match_words (cmd, words, word_cnt, NULL) == COMPLETE_MATCH)
- return cmd;
+ command_matcher_destroy (&cm);
- return NULL;
-}
-
-/* Returns the command with the given exact NAME.
- Aborts if no such command exists. */
-static const struct command *
-find_command (const char *name)
-{
- const struct command *cmd;
-
- for (cmd = commands; cmd < commands + command_cnt; cmd++)
- if (!strcmp (cmd->name, name))
- return cmd;
- NOT_REACHED ();
-}
-
-/* Frees the WORD_CNT words in WORDS. */
-static void
-free_words (char *words[], size_t word_cnt)
-{
- size_t idx;
-
- for (idx = 0; idx < word_cnt; idx++)
- free (words[idx]);
-}
-
-/* Flags an error that the command whose name is given by the
- WORD_CNT words in WORDS is unknown. */
-static void
-unknown_command_error (struct lexer *lexer, char *const words[], size_t word_cnt)
-{
- if (word_cnt == 0)
- lex_error (lexer, _("expecting command name"));
- else
- {
- struct string s;
- size_t i;
-
- ds_init_empty (&s);
- for (i = 0; i < word_cnt; i++)
- {
- if (i != 0)
- ds_put_byte (&s, ' ');
- ds_put_cstr (&s, words[i]);
- }
-
- msg (SE, _("Unknown command %s."), ds_cstr (&s));
-
- ds_destroy (&s);
- }
+ return missing_words;
}
/* Parse the command name and return a pointer to the corresponding
static const struct command *
parse_command_name (struct lexer *lexer)
{
- char *words[16];
- int word_cnt;
- int complete_word_cnt;
- int dash_possible;
-
- if (lex_token (lexer) == T_EXP ||
- lex_token (lexer) == '*' || lex_token (lexer) == '[')
- return find_command ("COMMENT");
-
- dash_possible = 0;
- word_cnt = complete_word_cnt = 0;
- while (lex_token (lexer) == T_ID || (dash_possible && lex_token (lexer) == '-'))
- {
- int cmd_match_cnt;
+ const struct command *command;
+ int missing_words;
+ struct string s;
- assert (word_cnt < sizeof words / sizeof *words);
- if (lex_token (lexer) == T_ID)
- {
- words[word_cnt] = ds_xstrdup (lex_tokstr (lexer));
- str_uppercase (words[word_cnt]);
- }
- else if (lex_token (lexer) == '-')
- words[word_cnt] = xstrdup ("-");
- word_cnt++;
+ if (lex_token (lexer) == T_EXP
+ || lex_token (lexer) == '*'
+ || lex_token (lexer) == '[')
+ {
+ static const struct command c = { S_ANY, 0, "COMMENT", cmd_comment };
+ return &c;
+ }
- cmd_match_cnt = count_matching_commands (words, word_cnt,
- &dash_possible);
- if (cmd_match_cnt == 0)
- break;
- else if (cmd_match_cnt == 1)
+ command = NULL;
+ missing_words = 0;
+ ds_init_empty (&s);
+ for (;;)
+ {
+ if (lex_token (lexer) == '-')
+ ds_put_byte (&s, '-');
+ else if (lex_token (lexer) == T_ID)
{
- const struct command *command = get_complete_match (words, word_cnt);
- if (command != NULL)
- {
- if (!(command->flags & F_KEEP_FINAL_TOKEN))
- lex_get (lexer);
- free_words (words, word_cnt);
- return command;
- }
+ if (!ds_is_empty (&s) && ds_last (&s) != '-')
+ ds_put_byte (&s, ' ');
+ ds_put_cstr (&s, lex_tokid (lexer));
}
- else /* cmd_match_cnt > 1 */
+ else if (lex_is_integer (lexer) && lex_integer (lexer) >= 0)
{
- /* Do we have a complete command name so far? */
- if (get_complete_match (words, word_cnt) != NULL)
- complete_word_cnt = word_cnt;
+ if (!ds_is_empty (&s) && ds_last (&s) != '-')
+ ds_put_byte (&s, ' ');
+ ds_put_format (&s, "%ld", lex_integer (lexer));
}
+ else
+ break;
+
+ missing_words = find_best_match (ds_ss (&s), &command);
+ if (missing_words <= 0)
+ break;
+
lex_get (lexer);
}
- /* If we saw a complete command name earlier, drop back to
- it. */
- if (complete_word_cnt)
+ if (command == NULL && missing_words > 0)
{
- int pushback_word_cnt;
- const struct command *command;
-
- /* Get the command. */
- command = get_complete_match (words, complete_word_cnt);
- assert (command != NULL);
-
- /* Figure out how many words we want to keep.
- We normally want to swallow the entire command. */
- pushback_word_cnt = complete_word_cnt + 1;
- if (command->flags & F_KEEP_FINAL_TOKEN)
- pushback_word_cnt--;
-
- /* FIXME: We only support one-token pushback. */
- assert (pushback_word_cnt + 1 >= word_cnt);
-
- while (word_cnt > pushback_word_cnt)
- {
- word_cnt--;
- if (strcmp (words[word_cnt], "-"))
- lex_put_back_id (lexer, words[word_cnt]);
- else
- lex_put_back (lexer, '-');
- free (words[word_cnt]);
- }
+ ds_put_cstr (&s, " .");
+ missing_words = find_best_match (ds_ss (&s), &command);
+ ds_truncate (&s, ds_length (&s) - 2);
+ }
- free_words (words, word_cnt);
- return command;
+ if (command == NULL)
+ {
+ if (ds_is_empty (&s))
+ lex_error (lexer, _("expecting command name"));
+ else
+ msg (SE, _("Unknown command `%s'."), ds_cstr (&s));
+ }
+ else if (missing_words == 0)
+ {
+ if (!(command->flags & F_KEEP_FINAL_TOKEN))
+ lex_get (lexer);
+ }
+ else if (missing_words < 0)
+ {
+ assert (missing_words == -1);
+ assert (!(command->flags & F_KEEP_FINAL_TOKEN));
}
- /* We didn't get a valid command name. */
- unknown_command_error (lexer, words, word_cnt);
- free_words (words, word_cnt);
- return NULL;
+ ds_destroy (&s);
+ return command;
}
/* Returns true if COMMAND is allowed in STATE,
data_in_imply_decimals (s, LEGACY_NATIVE, f->type, f->d, &out);
else
{
- msg (SE, "Cannot parse \"%.*s\" as format %s: %s",
+ msg (SE, "Cannot parse `%.*s' as format %s: %s",
(int) s.length, s.string, fmt_name (f->type), error);
free (error);
}
language_lexer_sources = \
- src/language/lexer/lexer.c src/language/lexer/lexer.h \
+ src/language/lexer/command-name.c \
+ src/language/lexer/command-name.h \
+ src/language/lexer/lexer.c \
+ src/language/lexer/lexer.h \
src/language/lexer/subcommand-list.c \
src/language/lexer/subcommand-list.h \
src/language/lexer/format-parser.c \
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "language/lexer/command-name.h"
+
+#include <assert.h>
+#include <limits.h>
+
+#include "data/identifier.h"
+
+#include "gl/c-ctype.h"
+
+/* Stores the first word in S into WORD and advances S past that word. Returns
+ true if successful, false if no word remained in S to be extracted.
+
+ A word is a sequence of digits, a letter possibly followed by a sequence of
+ letters or digits, or one character of another type. Words may be delimited
+ by spaces. */
+static bool
+find_word (struct substring *s, struct substring *word)
+{
+ size_t ofs;
+ ucs4_t c;
+
+ /* Skip whitespace. */
+ for (;;)
+ {
+ c = ss_first_mb (*s);
+ if (c == UINT32_MAX)
+ {
+ *word = ss_empty ();
+ return false;
+ }
+ else if (lex_uc_is_space (c))
+ ss_get_mb (s);
+ else
+ break;
+ }
+
+ ofs = ss_first_mblen (*s);
+ if (lex_uc_is_id1 (c))
+ {
+ while (lex_uc_is_idn (ss_at_mb (*s, ofs)))
+ ofs += ss_at_mblen (*s, ofs);
+ }
+ else if (c_isdigit (c))
+ {
+ while (c_isdigit (s->string[ofs]))
+ ofs++;
+ }
+ ss_get_bytes (s, ofs, word);
+ return true;
+}
+
+/* Returns the number of words in S, as extracted by find_word(). */
+static int
+count_words (struct substring s)
+{
+ struct substring word;
+ int n;
+
+ n = 0;
+ while (find_word (&s, &word))
+ n++;
+ return n;
+}
+
+/* Compares STRING obtained from the user against the full name of a COMMAND,
+ using this algorithm:
+
+ 1. Divide COMMAND into words C[0] through C[n - 1].
+
+ 2. Divide STRING into words S[0] through S[m - 1].
+
+ 3. Compare word C[i] against S[i] for 0 <= i < min(n, m), using the keyword
+ matching algorithm implemented by lex_id_match(). If any of them fail to
+ match, then STRING does not match COMMAND and the function returns false.
+
+ 4. Otherwise, STRING and COMMAND match. Set *MISSING_WORDS to n - m. Set
+ *EXACT to false if any of the S[i] were found to be abbreviated in the
+ comparisons done in step 3, or to true if they were all exactly equal
+ (modulo case). Return true. */
+bool
+command_match (struct substring command, struct substring string,
+ bool *exact, int *missing_words)
+{
+ *exact = true;
+ for (;;)
+ {
+ struct substring cw, sw;
+ int match;
+
+ if (!find_word (&command, &cw))
+ {
+ *missing_words = -count_words (string);
+ return true;
+ }
+ else if (!find_word (&string, &sw))
+ {
+ *missing_words = 1 + count_words (command);
+ return true;
+ }
+
+ match = lex_id_match (cw, sw);
+ if (sw.length < cw.length)
+ *exact = false;
+ if (match == 0)
+ return false;
+ }
+}
+
+/* Initializes CM for matching STRING against a table of command names.
+
+ STRING may be ASCII or UTF-8.
+
+ For sample use, see command.c. Here's a usage outline:
+
+ // Try each possible command.
+ command_matcher_init (&cm, string);
+ for (cmd = commands; cmd < &commands[command_cnt]; cmd++)
+ command_matcher_add (&cm, cmd->name, cmd);
+
+ // Get the result.
+ match = command_matcher_get_match (&cm);
+ missing_words = command_matcher_get_missing_words (&cm);
+
+ if (missing_words > 0)
+ {
+ // Incomplete command name. Add another word to the string
+ // and start over. Or if there are no more words to be added,
+ // add " ." to the string as a sentinel and start over.
+ }
+ else if (match == NULL)
+ {
+ // No valid command with this name.
+ }
+ else if (missing_words == 0)
+ {
+ // The full, correct command name is 'match'.
+ }
+ else if (missing_words < 0)
+ {
+ // The abs(missing_words) last words of 'string' are actually
+ // part of the command's body, not part of its name; they
+ // were only needed to resolve ambiguities. 'match' is the
+ // correct command but those extra words should be put back
+ // for later re-parsing.
+ }
+*/
+void
+command_matcher_init (struct command_matcher *cm, struct substring string)
+{
+ cm->string = string;
+ cm->extensible = false;
+ cm->exact_match = NULL;
+ cm->n_matches = 0;
+ cm->match = NULL;
+ cm->match_missing_words = 0;
+}
+
+/* Destroys CM's state. */
+void
+command_matcher_destroy (struct command_matcher *cm UNUSED)
+{
+ /* Nothing to do. */
+}
+
+/* Considers COMMAND as a candidate for the command name being parsed by CM.
+ If COMMAND is the correct command name, then command_matcher_get_match()
+ will return AUX later.
+
+ COMMAND must be an ASCII string. */
+void
+command_matcher_add (struct command_matcher *cm, struct substring command,
+ void *aux)
+{
+ int missing_words;
+ bool exact;
+
+ assert (aux != NULL);
+ if (command_match (command, cm->string, &exact, &missing_words))
+ {
+ if (missing_words > 0)
+ cm->extensible = true;
+ else if (exact && missing_words == 0)
+ cm->exact_match = aux;
+ else
+ {
+ if (missing_words > cm->match_missing_words)
+ cm->n_matches = 0;
+
+ if (missing_words >= cm->match_missing_words || cm->n_matches == 0)
+ {
+ cm->n_matches++;
+ cm->match = aux;
+ cm->match_missing_words = missing_words;
+ }
+ }
+ }
+}
+
+/* Returns the command name matched by CM. */
+void *
+command_matcher_get_match (const struct command_matcher *cm)
+{
+ return (cm->extensible ? NULL
+ : cm->exact_match != NULL ? cm->exact_match
+ : cm->n_matches == 1 ? cm->match
+ : NULL);
+}
+
+/* Returns the difference between the number of words in the matched command
+ name and the string provided to command_matcher_init(). */
+int
+command_matcher_get_missing_words (const struct command_matcher *cm)
+{
+ return (cm->extensible ? 1
+ : cm->exact_match != NULL ? 0
+ : cm->match_missing_words);
+}
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef COMMAND_NAME_H
+#define COMMAND_NAME_H 1
+
+#include <stdbool.h>
+#include "libpspp/str.h"
+
+bool command_match (struct substring command, struct substring string,
+ bool *exact, int *missing_words);
+
+/* Allows matching a string against a table of command names. */
+struct command_matcher
+ {
+ struct substring string;
+ bool extensible;
+ void *exact_match;
+ int n_matches;
+ void *match;
+ int match_missing_words;
+ };
+
+void command_matcher_init (struct command_matcher *, struct substring string);
+void command_matcher_destroy (struct command_matcher *);
+
+void command_matcher_add (struct command_matcher *, struct substring command,
+ void *aux);
+
+void *command_matcher_get_match (const struct command_matcher *);
+int command_matcher_get_missing_words (const struct command_matcher *);
+
+#endif /* command-name.h */
check_PROGRAMS += \
tests/data/datasheet-test \
tests/data/inexactify \
+ tests/language/lexer/command-name-test \
tests/libpspp/abt-test \
tests/libpspp/bt-test \
tests/libpspp/heap-test \
tests_dissect_sysfile_LDADD = gl/libgl.la $(LIBINTL)
tests_dissect_sysfile_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\"
+check_PROGRAMS += tests/language/lexer/command-name-test
+tests_language_lexer_command_name_test_SOURCES = \
+ src/data/identifier.c \
+ src/language/lexer/command-name.c \
+ tests/language/lexer/command-name-test.c
+tests_language_lexer_command_name_test_LDADD = \
+ src/libpspp/libpspp.la \
+ gl/libgl.la \
+ $(LIBINTL)
+tests_language_lexer_command_name_test_CFLAGS = $(AM_CFLAGS)
+
check_PROGRAMS += tests/output/render-test
tests_output_render_test_SOURCES = tests/output/render-test.c
tests_output_render_test_LDADD = \
tests/language/dictionary/weight.at \
tests/language/expressions/evaluate.at \
tests/language/expressions/parse.at \
+ tests/language/lexer/command-name.at \
tests/language/lexer/lexer.at \
tests/language/lexer/q2c.at \
tests/language/lexer/variable-parser.at \
CHECK_LOCAL += tests_check
tests_check: tests/atconfig tests/atlocal $(TESTSUITE) $(check_PROGRAMS)
- $(SHELL) '$(TESTSUITE)' -C tests AUTOTEST_PATH=tests/data:tests/libpspp:tests/output:src/ui/terminal $(TESTSUITEFLAGS)
+ $(SHELL) '$(TESTSUITE)' -C tests AUTOTEST_PATH=tests/data:tests/language/lexer:tests/libpspp:tests/output:src/ui/terminal $(TESTSUITEFLAGS)
CLEAN_LOCAL += tests_clean
tests_clean:
AUTOM4TE = $(SHELL) $(srcdir)/build-aux/missing --run autom4te
AUTOTEST = $(AUTOM4TE) --language=autotest
$(TESTSUITE): package.m4 $(srcdir)/tests/testsuite.at $(TESTSUITE_AT)
- $(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at
+ $(AUTOTEST) -I '$(srcdir)' $@.at | sed 's/@<00A0>@/ /g' > $@.tmp
mv $@.tmp $@
# The `:;' works around a Bash 3.2 bug when the output is not writeable.
EXECUTE.
])
AT_CHECK([pspp -O format=csv command.sps], [1], [dnl
-command.sps:1: error: Unknown command DATA RUBBISH.
+command.sps:1: error: Unknown command `DATA rubbish'.
command.sps:2: error: EXECUTE: EXECUTE is allowed only after the active file has been defined.
])
AT_CHECK([pspp -O format=csv input-program.sps], [1], [dnl
input-program.sps:3: error: BEGIN DATA: BEGIN DATA is not allowed inside INPUT PROGRAM.
-input-program.sps:4: error: Syntax error at `123456789': expecting command name.
+input-program.sps:4: error: Unknown command `123456789'.
-input-program.sps:5: error: Unknown command END DATA.
+input-program.sps:5: error: Unknown command `END DATA'.
])
AT_CLEANUP
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2010 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libpspp/assertion.h"
+#include "libpspp/compiler.h"
+#include "language/lexer/command-name.h"
+
+#include "gl/error.h"
+#include "gl/progname.h"
+
+static char **commands, **strings;
+static size_t n_commands, n_strings;
+
+static void parse_options (int argc, char **argv);
+static void usage (void) NO_RETURN;
+
+int
+main (int argc, char *argv[])
+{
+ size_t i;
+
+ set_program_name (argv[0]);
+ parse_options (argc, argv);
+
+ for (i = 0; i < n_strings; i++)
+ {
+ const char *string = strings[i];
+ struct command_matcher cm;
+ const char *best;
+ size_t j;
+
+ if (i > 0)
+ putchar ('\n');
+ printf ("string=\"%s\":\n", string);
+ for (j = 0; j < n_commands; j++)
+ {
+ const char *command = commands[j];
+ int missing_words;
+ bool match, exact;
+
+ match = command_match (ss_cstr (command), ss_cstr (string),
+ &exact, &missing_words);
+ printf ("\tcommand=\"%s\" match=%s",
+ command, match ? "yes" : "no");
+ if (match)
+ printf (" exact=%s missing_words=%d",
+ exact ? "yes" : "no", missing_words);
+ putchar ('\n');
+ }
+
+ command_matcher_init (&cm, ss_cstr (string));
+ for (j = 0; j < n_commands; j++)
+ command_matcher_add (&cm, ss_cstr (commands[j]), commands[j]);
+ best = command_matcher_get_match (&cm);
+ printf ("match: %s, missing_words=%d\n",
+ best ? best : "none", command_matcher_get_missing_words (&cm));
+ command_matcher_destroy (&cm);
+ }
+
+ return 0;
+}
+
+static void
+parse_options (int argc, char **argv)
+{
+ int breakpoint;
+
+ for (;;)
+ {
+ static const struct option options[] =
+ {
+ {"help", no_argument, NULL, 'h'},
+ {NULL, 0, NULL, 0},
+ };
+
+ int c = getopt_long (argc, argv, "h", options, NULL);
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case 'h':
+ usage ();
+
+ case 0:
+ break;
+
+ case '?':
+ exit (EXIT_FAILURE);
+ break;
+
+ default:
+ NOT_REACHED ();
+ }
+
+ }
+
+ for (breakpoint = optind; ; breakpoint++)
+ if (breakpoint >= argc)
+ error (1, 0, "missing ',' on command line; use --help for help");
+ else if (!strcmp (argv[breakpoint], ","))
+ break;
+
+ commands = &argv[optind];
+ n_commands = breakpoint - optind;
+
+ strings = &argv[breakpoint + 1];
+ n_strings = argc - (breakpoint + 1);
+
+ if (n_commands == 0 || n_strings == 0)
+ error (1, 0, "must specify at least one command and one string; "
+ "use --help for help");
+}
+
+static void
+usage (void)
+{
+ printf ("\
+%s, to match PSPP command names\n\
+usage: %s [OPTIONS] COMMAND... , STRING...\n\
+\n\
+Options:\n\
+ -h, --help print this help message\n",
+ program_name, program_name);
+ exit (EXIT_SUCCESS);
+}
--- /dev/null
+AT_BANNER([command name matching])
+\f
+AT_SETUP([single words])
+AT_KEYWORDS([command name matching])
+AT_CHECK([command-name-test DESCRIPTIVES , DESCRIPTIVESX DESCRIPTIVES descr Des DEX DE '' 'DESCRIPTIVES MORE' 'DESCRIPTIVES@<00A0>@MORE'],
+ [0], [dnl
+string="DESCRIPTIVESX":
+ command="DESCRIPTIVES" match=no
+match: none, missing_words=0
+
+string="DESCRIPTIVES":
+ command="DESCRIPTIVES" match=yes exact=yes missing_words=0
+match: DESCRIPTIVES, missing_words=0
+
+string="descr":
+ command="DESCRIPTIVES" match=yes exact=no missing_words=0
+match: DESCRIPTIVES, missing_words=0
+
+string="Des":
+ command="DESCRIPTIVES" match=yes exact=no missing_words=0
+match: DESCRIPTIVES, missing_words=0
+
+string="DEX":
+ command="DESCRIPTIVES" match=no
+match: none, missing_words=0
+
+string="DE":
+ command="DESCRIPTIVES" match=no
+match: none, missing_words=0
+
+string="":
+ command="DESCRIPTIVES" match=yes exact=yes missing_words=1
+match: none, missing_words=1
+
+string="DESCRIPTIVES MORE":
+ command="DESCRIPTIVES" match=yes exact=yes missing_words=-1
+match: DESCRIPTIVES, missing_words=-1
+
+string="DESCRIPTIVES@<00A0>@MORE":
+ command="DESCRIPTIVES" match=yes exact=yes missing_words=-1
+match: DESCRIPTIVES, missing_words=-1
+])
+AT_CLEANUP
+\f
+AT_SETUP([two words without prefix match])
+AT_KEYWORDS([command name matching])
+AT_CHECK([command-name-test 'DO IF' 'DO REPEAT' , 'DO@<00A0>@IF' 'DO REPEAT' 'DO REP' 'DO OTHER' 'D IF' 'DO I' DO],
+ [0], [dnl
+string="DO@<00A0>@IF":
+ command="DO IF" match=yes exact=yes missing_words=0
+ command="DO REPEAT" match=no
+match: DO IF, missing_words=0
+
+string="DO REPEAT":
+ command="DO IF" match=no
+ command="DO REPEAT" match=yes exact=yes missing_words=0
+match: DO REPEAT, missing_words=0
+
+string="DO REP":
+ command="DO IF" match=no
+ command="DO REPEAT" match=yes exact=no missing_words=0
+match: DO REPEAT, missing_words=0
+
+string="DO OTHER":
+ command="DO IF" match=no
+ command="DO REPEAT" match=no
+match: none, missing_words=0
+
+string="D IF":
+ command="DO IF" match=no
+ command="DO REPEAT" match=no
+match: none, missing_words=0
+
+string="DO I":
+ command="DO IF" match=no
+ command="DO REPEAT" match=no
+match: none, missing_words=0
+
+string="DO":
+ command="DO IF" match=yes exact=yes missing_words=1
+ command="DO REPEAT" match=yes exact=yes missing_words=1
+match: none, missing_words=1
+])
+AT_CLEANUP
+\f
+AT_SETUP([two words with prefix match])
+AT_KEYWORDS([command name matching])
+AT_CHECK([command-name-test GET 'GET DATA' , GET 'GET TYPE' 'GET DAT' 'GET DATA'],
+ [0], [dnl
+string="GET":
+ command="GET" match=yes exact=yes missing_words=0
+ command="GET DATA" match=yes exact=yes missing_words=1
+match: none, missing_words=1
+
+string="GET TYPE":
+ command="GET" match=yes exact=yes missing_words=-1
+ command="GET DATA" match=no
+match: GET, missing_words=-1
+
+string="GET DAT":
+ command="GET" match=yes exact=yes missing_words=-1
+ command="GET DATA" match=yes exact=no missing_words=0
+match: GET DATA, missing_words=0
+
+string="GET DATA":
+ command="GET" match=yes exact=yes missing_words=-1
+ command="GET DATA" match=yes exact=yes missing_words=0
+match: GET DATA, missing_words=0
+])
+AT_CLEANUP
+\f
+AT_SETUP([ambiguous single-word names])
+AT_KEYWORDS([command name matching])
+AT_CHECK([command-name-test CASEPLOT CASESTOVARS , CAS Case CaseP CaseS], [0],
+ [dnl
+string="CAS":
+ command="CASEPLOT" match=yes exact=no missing_words=0
+ command="CASESTOVARS" match=yes exact=no missing_words=0
+match: none, missing_words=0
+
+string="Case":
+ command="CASEPLOT" match=yes exact=no missing_words=0
+ command="CASESTOVARS" match=yes exact=no missing_words=0
+match: none, missing_words=0
+
+string="CaseP":
+ command="CASEPLOT" match=yes exact=no missing_words=0
+ command="CASESTOVARS" match=no
+match: CASEPLOT, missing_words=0
+
+string="CaseS":
+ command="CASEPLOT" match=no
+ command="CASESTOVARS" match=yes exact=no missing_words=0
+match: CASESTOVARS, missing_words=0
+])
+AT_CLEANUP
+
+AT_SETUP([ambiguous two-word names])
+AT_KEYWORDS([command name matching])
+AT_CHECK([command-name-test VARCOMP VARSTOCASES 'VARIABLE ATTRIBUTE' , VAR VARC VARS VARI 'VAR@<00A0>@ATT'],
+ [0], [dnl
+string="VAR":
+ command="VARCOMP" match=yes exact=no missing_words=0
+ command="VARSTOCASES" match=yes exact=no missing_words=0
+ command="VARIABLE ATTRIBUTE" match=yes exact=no missing_words=1
+match: none, missing_words=1
+
+string="VARC":
+ command="VARCOMP" match=yes exact=no missing_words=0
+ command="VARSTOCASES" match=no
+ command="VARIABLE ATTRIBUTE" match=no
+match: VARCOMP, missing_words=0
+
+string="VARS":
+ command="VARCOMP" match=no
+ command="VARSTOCASES" match=yes exact=no missing_words=0
+ command="VARIABLE ATTRIBUTE" match=no
+match: VARSTOCASES, missing_words=0
+
+string="VARI":
+ command="VARCOMP" match=no
+ command="VARSTOCASES" match=no
+ command="VARIABLE ATTRIBUTE" match=yes exact=no missing_words=1
+match: none, missing_words=1
+
+string="VAR@<00A0>@ATT":
+ command="VARCOMP" match=yes exact=no missing_words=-1
+ command="VARSTOCASES" match=yes exact=no missing_words=-1
+ command="VARIABLE ATTRIBUTE" match=yes exact=no missing_words=0
+match: VARIABLE ATTRIBUTE, missing_words=0
+])
+AT_CLEANUP
+\f
+AT_SETUP([numbers and punctuation])
+AT_KEYWORDS([command name matching])
+AT_CHECK([command-name-test T-TEST 2SLS LIST , T-TEST 'T - Test' 2SLS '2 SLS' List],
+ [0], [dnl
+string="T-TEST":
+ command="T-TEST" match=yes exact=yes missing_words=0
+ command="2SLS" match=no
+ command="LIST" match=no
+match: T-TEST, missing_words=0
+
+string="T - Test":
+ command="T-TEST" match=yes exact=yes missing_words=0
+ command="2SLS" match=no
+ command="LIST" match=no
+match: T-TEST, missing_words=0
+
+string="2SLS":
+ command="T-TEST" match=no
+ command="2SLS" match=yes exact=yes missing_words=0
+ command="LIST" match=no
+match: 2SLS, missing_words=0
+
+string="2 SLS":
+ command="T-TEST" match=no
+ command="2SLS" match=yes exact=yes missing_words=0
+ command="LIST" match=no
+match: 2SLS, missing_words=0
+
+string="List":
+ command="T-TEST" match=no
+ command="2SLS" match=no
+ command="LIST" match=yes exact=yes missing_words=0
+match: LIST, missing_words=0
+])
+AT_CLEANUP
+\f
+AT_SETUP([off by more than one word])
+AT_KEYWORDS([command name matching])
+AT_CHECK([command-name-test 'a@<00A0>@b c' , a 'a b' 'a b c' 'a@<00A0>@b c d' 'a b c@<00A0>@d e'],
+ [0], [dnl
+string="a":
+ command="a@<00A0>@b c" match=yes exact=yes missing_words=2
+match: none, missing_words=1
+
+string="a b":
+ command="a@<00A0>@b c" match=yes exact=yes missing_words=1
+match: none, missing_words=1
+
+string="a b c":
+ command="a@<00A0>@b c" match=yes exact=yes missing_words=0
+match: a@<00A0>@b c, missing_words=0
+
+string="a@<00A0>@b c d":
+ command="a@<00A0>@b c" match=yes exact=yes missing_words=-1
+match: a@<00A0>@b c, missing_words=-1
+
+string="a b c@<00A0>@d e":
+ command="a@<00A0>@b c" match=yes exact=yes missing_words=-2
+match: a@<00A0>@b c, missing_words=-2
+])
+AT_CLEANUP