From 8381768f3394a907c621cb9acbb77b83f5cd4875 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 3 Dec 2006 22:16:45 +0000 Subject: [PATCH] Fix lack of ->name and ->location in DO REPEAT's getl_interface. See bug #15702. Significant cleanup to DO REPEAT. --- po/de.po | 98 +++--- po/pspp.pot | 98 +++--- src/data/ChangeLog | 23 ++ src/data/identifier.c | 144 +++++---- src/data/identifier.h | 26 +- src/data/variable.c | 2 +- src/language/ChangeLog | 4 + src/language/control/ChangeLog | 51 +++ src/language/control/repeat.c | 412 +++++++++++++----------- src/language/data-io/data-reader.c | 2 +- src/language/data-io/get.c | 3 +- src/language/dictionary/sys-file-info.c | 3 +- src/language/lexer/ChangeLog | 27 ++ src/language/lexer/lexer.c | 131 ++++---- src/language/lexer/lexer.h | 15 +- src/language/syntax-file.c | 4 +- src/language/syntax-file.h | 6 +- src/language/utilities/set.q | 2 +- src/libpspp/ChangeLog | 25 ++ src/libpspp/getl.c | 13 +- src/libpspp/getl.h | 32 +- src/libpspp/str.c | 19 ++ src/libpspp/str.h | 7 +- src/ui/terminal/ChangeLog | 5 + src/ui/terminal/read-line.c | 4 +- 25 files changed, 702 insertions(+), 454 deletions(-) diff --git a/po/de.po b/po/de.po index bd5717df..c7ec86d5 100644 --- a/po/de.po +++ b/po/de.po @@ -10,7 +10,7 @@ msgid "" msgstr "" "Project-Id-Version: PSPP 0.4.2\n" "Report-Msgid-Bugs-To: pspp-dev@gnu.org\n" -"POT-Creation-Date: 2006-11-29 18:57+0800\n" +"POT-Creation-Date: 2006-11-30 22:52-0800\n" "PO-Revision-Date: 2006-05-26 17:49+0800\n" "Last-Translator: John Darrington \n" "Language-Team: German \n" @@ -306,13 +306,13 @@ msgstr "" #: src/data/format.c:310 src/data/por-file-reader.c:487 #: src/data/sys-file-reader.c:1222 src/data/sys-file-reader.c:1235 -#: src/ui/gui/psppire.glade:1192 src/ui/gui/psppire-var-store.c:451 +#: src/ui/gui/psppire-var-store.c:451 src/ui/gui/psppire.glade:1192 msgid "String" msgstr "Zeichenkette" #: src/data/format.c:310 src/data/por-file-reader.c:487 #: src/data/sys-file-reader.c:1222 src/data/sys-file-reader.c:1235 -#: src/ui/gui/psppire.glade:1053 src/ui/gui/psppire-var-store.c:444 +#: src/ui/gui/psppire-var-store.c:444 src/ui/gui/psppire.glade:1053 msgid "Numeric" msgstr "Nummer" @@ -1335,7 +1335,7 @@ msgstr "" msgid "COLUMN subcommand multiply specified." msgstr "" -#: src/language/data-io/inpt-pgm.c:385 +#: src/language/data-io/inpt-pgm.c:386 msgid "" "REREAD: Column numbers must be positive finite numbers. Column set to 1." msgstr "" @@ -1597,6 +1597,15 @@ msgid "" "Data fields must be listed in order of increasing record number." msgstr "" +#: src/language/data-io/print-space.c:116 +msgid "The expression on PRINT SPACE evaluated to the system-missing value." +msgstr "" + +#: src/language/data-io/print-space.c:119 +#, c-format +msgid "The expression on PRINT SPACE evaluated to %g." +msgstr "" + #: src/language/data-io/print.c:261 #, c-format msgid "Output calls for %d records but %d specified on RECORDS subcommand." @@ -1616,15 +1625,6 @@ msgid_plural "Writing %d records." msgstr[0] "" msgstr[1] "" -#: src/language/data-io/print-space.c:116 -msgid "The expression on PRINT SPACE evaluated to the system-missing value." -msgstr "" - -#: src/language/data-io/print-space.c:119 -#, c-format -msgid "The expression on PRINT SPACE evaluated to %g." -msgstr "" - #: src/language/dictionary/apply-dictionary.c:73 #, c-format msgid "Variable %s is %s in target file, but %s in source file." @@ -1958,7 +1958,7 @@ msgstr "" msgid "Value label `%g' is not integer." msgstr "" -#: src/language/dictionary/value-labels.c:185 +#: src/language/dictionary/value-labels.c:186 msgid "Truncating value label to 60 characters." msgstr "" @@ -3666,20 +3666,20 @@ msgstr "" msgid "Document entered %s by %s:" msgstr "" -#: src/language/xforms/compute.c:148 src/language/xforms/compute.c:195 +#: src/language/xforms/compute.c:147 src/language/xforms/compute.c:194 #, c-format msgid "" "When executing COMPUTE: SYSMIS is not a valid value as an index into vector %" "s." msgstr "" -#: src/language/xforms/compute.c:151 src/language/xforms/compute.c:202 +#: src/language/xforms/compute.c:150 src/language/xforms/compute.c:201 #, c-format msgid "" "When executing COMPUTE: %g is not a valid value as an index into vector %s." msgstr "" -#: src/language/xforms/compute.c:340 +#: src/language/xforms/compute.c:343 #, c-format msgid "There is no vector named %s." msgstr "" @@ -4233,10 +4233,6 @@ msgstr "Unpassend Wert für Variable" msgid "Incorrect range specification" msgstr "Falshe Spannweitebeschreibung" -#: src/ui/gui/psppire.c:71 -msgid "Sorry. The help system hasn't yet been implemented." -msgstr "Es gibt noch nicht kein Helpsysteme. Schade!" - #: src/ui/gui/psppire-data-store.c:724 msgid "var" msgstr "" @@ -4248,6 +4244,38 @@ msgstr "" msgid "%d" msgstr "" +#: src/ui/gui/psppire-var-store.c:440 +msgid "None" +msgstr "Keine" + +#: src/ui/gui/psppire-var-store.c:445 src/ui/gui/psppire.glade:1072 +msgid "Comma" +msgstr "Komma" + +#: src/ui/gui/psppire-var-store.c:446 src/ui/gui/psppire.glade:1092 +msgid "Dot" +msgstr "Punkt" + +#: src/ui/gui/psppire-var-store.c:447 +msgid "Scientific" +msgstr "Wissenschäflich" + +#: src/ui/gui/psppire-var-store.c:448 src/ui/gui/psppire.glade:1132 +msgid "Date" +msgstr "Datum" + +#: src/ui/gui/psppire-var-store.c:449 src/ui/gui/psppire.glade:1152 +msgid "Dollar" +msgstr "Euro" + +#: src/ui/gui/psppire-var-store.c:450 +msgid "Custom" +msgstr "Spezial" + +#: src/ui/gui/psppire.c:71 +msgid "Sorry. The help system hasn't yet been implemented." +msgstr "Es gibt noch nicht kein Helpsysteme. Schade!" + #: src/ui/gui/psppire.glade:39 msgid "_File" msgstr "_Datei" @@ -4428,26 +4456,10 @@ msgstr "" msgid "Variable Type" msgstr "Variableansicht" -#: src/ui/gui/psppire.glade:1072 src/ui/gui/psppire-var-store.c:445 -msgid "Comma" -msgstr "Komma" - -#: src/ui/gui/psppire.glade:1092 src/ui/gui/psppire-var-store.c:446 -msgid "Dot" -msgstr "Punkt" - #: src/ui/gui/psppire.glade:1112 msgid "Scientific notation" msgstr "Wissenschaftlichnotation" -#: src/ui/gui/psppire.glade:1132 src/ui/gui/psppire-var-store.c:448 -msgid "Date" -msgstr "Datum" - -#: src/ui/gui/psppire.glade:1152 src/ui/gui/psppire-var-store.c:449 -msgid "Dollar" -msgstr "Euro" - #: src/ui/gui/psppire.glade:1172 msgid "Custom currency" msgstr "Spezialwährung" @@ -4528,18 +4540,6 @@ msgstr "" msgid "Sort by:" msgstr "" -#: src/ui/gui/psppire-var-store.c:440 -msgid "None" -msgstr "Keine" - -#: src/ui/gui/psppire-var-store.c:447 -msgid "Scientific" -msgstr "Wissenschäflich" - -#: src/ui/gui/psppire-var-store.c:450 -msgid "Custom" -msgstr "Spezial" - #: src/ui/gui/sort-cases-dialog.c:342 msgid "Var" msgstr "" diff --git a/po/pspp.pot b/po/pspp.pot index fe72d407..9da0746c 100644 --- a/po/pspp.pot +++ b/po/pspp.pot @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: pspp-dev@gnu.org\n" -"POT-Creation-Date: 2006-11-29 18:57+0800\n" +"POT-Creation-Date: 2006-11-30 22:52-0800\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -305,13 +305,13 @@ msgstr "" #: src/data/format.c:310 src/data/por-file-reader.c:487 #: src/data/sys-file-reader.c:1222 src/data/sys-file-reader.c:1235 -#: src/ui/gui/psppire.glade:1192 src/ui/gui/psppire-var-store.c:451 +#: src/ui/gui/psppire-var-store.c:451 src/ui/gui/psppire.glade:1192 msgid "String" msgstr "" #: src/data/format.c:310 src/data/por-file-reader.c:487 #: src/data/sys-file-reader.c:1222 src/data/sys-file-reader.c:1235 -#: src/ui/gui/psppire.glade:1053 src/ui/gui/psppire-var-store.c:444 +#: src/ui/gui/psppire-var-store.c:444 src/ui/gui/psppire.glade:1053 msgid "Numeric" msgstr "" @@ -1334,7 +1334,7 @@ msgstr "" msgid "COLUMN subcommand multiply specified." msgstr "" -#: src/language/data-io/inpt-pgm.c:385 +#: src/language/data-io/inpt-pgm.c:386 msgid "" "REREAD: Column numbers must be positive finite numbers. Column set to 1." msgstr "" @@ -1596,6 +1596,15 @@ msgid "" "Data fields must be listed in order of increasing record number." msgstr "" +#: src/language/data-io/print-space.c:116 +msgid "The expression on PRINT SPACE evaluated to the system-missing value." +msgstr "" + +#: src/language/data-io/print-space.c:119 +#, c-format +msgid "The expression on PRINT SPACE evaluated to %g." +msgstr "" + #: src/language/data-io/print.c:261 #, c-format msgid "Output calls for %d records but %d specified on RECORDS subcommand." @@ -1615,15 +1624,6 @@ msgid_plural "Writing %d records." msgstr[0] "" msgstr[1] "" -#: src/language/data-io/print-space.c:116 -msgid "The expression on PRINT SPACE evaluated to the system-missing value." -msgstr "" - -#: src/language/data-io/print-space.c:119 -#, c-format -msgid "The expression on PRINT SPACE evaluated to %g." -msgstr "" - #: src/language/dictionary/apply-dictionary.c:73 #, c-format msgid "Variable %s is %s in target file, but %s in source file." @@ -1957,7 +1957,7 @@ msgstr "" msgid "Value label `%g' is not integer." msgstr "" -#: src/language/dictionary/value-labels.c:185 +#: src/language/dictionary/value-labels.c:186 msgid "Truncating value label to 60 characters." msgstr "" @@ -3665,20 +3665,20 @@ msgstr "" msgid "Document entered %s by %s:" msgstr "" -#: src/language/xforms/compute.c:148 src/language/xforms/compute.c:195 +#: src/language/xforms/compute.c:147 src/language/xforms/compute.c:194 #, c-format msgid "" "When executing COMPUTE: SYSMIS is not a valid value as an index into vector %" "s." msgstr "" -#: src/language/xforms/compute.c:151 src/language/xforms/compute.c:202 +#: src/language/xforms/compute.c:150 src/language/xforms/compute.c:201 #, c-format msgid "" "When executing COMPUTE: %g is not a valid value as an index into vector %s." msgstr "" -#: src/language/xforms/compute.c:340 +#: src/language/xforms/compute.c:343 #, c-format msgid "There is no vector named %s." msgstr "" @@ -4232,10 +4232,6 @@ msgstr "" msgid "Incorrect range specification" msgstr "" -#: src/ui/gui/psppire.c:71 -msgid "Sorry. The help system hasn't yet been implemented." -msgstr "" - #: src/ui/gui/psppire-data-store.c:724 msgid "var" msgstr "" @@ -4247,6 +4243,38 @@ msgstr "" msgid "%d" msgstr "" +#: src/ui/gui/psppire-var-store.c:440 +msgid "None" +msgstr "" + +#: src/ui/gui/psppire-var-store.c:445 src/ui/gui/psppire.glade:1072 +msgid "Comma" +msgstr "" + +#: src/ui/gui/psppire-var-store.c:446 src/ui/gui/psppire.glade:1092 +msgid "Dot" +msgstr "" + +#: src/ui/gui/psppire-var-store.c:447 +msgid "Scientific" +msgstr "" + +#: src/ui/gui/psppire-var-store.c:448 src/ui/gui/psppire.glade:1132 +msgid "Date" +msgstr "" + +#: src/ui/gui/psppire-var-store.c:449 src/ui/gui/psppire.glade:1152 +msgid "Dollar" +msgstr "" + +#: src/ui/gui/psppire-var-store.c:450 +msgid "Custom" +msgstr "" + +#: src/ui/gui/psppire.c:71 +msgid "Sorry. The help system hasn't yet been implemented." +msgstr "" + #: src/ui/gui/psppire.glade:39 msgid "_File" msgstr "" @@ -4419,26 +4447,10 @@ msgstr "" msgid "Variable Type" msgstr "" -#: src/ui/gui/psppire.glade:1072 src/ui/gui/psppire-var-store.c:445 -msgid "Comma" -msgstr "" - -#: src/ui/gui/psppire.glade:1092 src/ui/gui/psppire-var-store.c:446 -msgid "Dot" -msgstr "" - #: src/ui/gui/psppire.glade:1112 msgid "Scientific notation" msgstr "" -#: src/ui/gui/psppire.glade:1132 src/ui/gui/psppire-var-store.c:448 -msgid "Date" -msgstr "" - -#: src/ui/gui/psppire.glade:1152 src/ui/gui/psppire-var-store.c:449 -msgid "Dollar" -msgstr "" - #: src/ui/gui/psppire.glade:1172 msgid "Custom currency" msgstr "" @@ -4519,18 +4531,6 @@ msgstr "" msgid "Sort by:" msgstr "" -#: src/ui/gui/psppire-var-store.c:440 -msgid "None" -msgstr "" - -#: src/ui/gui/psppire-var-store.c:447 -msgid "Scientific" -msgstr "" - -#: src/ui/gui/psppire-var-store.c:450 -msgid "Custom" -msgstr "" - #: src/ui/gui/sort-cases-dialog.c:342 msgid "Var" msgstr "" diff --git a/src/data/ChangeLog b/src/data/ChangeLog index 9bd3d429..70873e43 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,26 @@ +Sat Dec 2 16:28:32 2006 Ben Pfaff + + Clean up identifier code: don't require identifier enumerations to + be in a particular order; make better use of string library; + expose less of the internals. + + * identifier.c: (lex_skip_identifier) Rename lex_id_get_length, + change interface. Updated all callers. + (lex_id_match) Change interface to use struct substring, update + all callers. + (lex_id_match_len) Removed. Update callers to use lex_id_match. + (global array keywords[]) Make static, change form. Update all + users to use lex_id_name instead. + (lex_is_keyword) New function. + (lex_id_to_token) Change interface to use struct substring, update + all callers. + (lex_id_name) New function. + + * identifier.h: (T_FIRST_KEYWORD) Removed. Changed users to call + lex_is_keyword instead. + (T_LAST_KEYWORD) Removed. + (T_N_KEYWORDS) Removed. + Sat Nov 18 20:46:35 2006 Ben Pfaff * format.c: (fmt_date_template) Distinguish characters for which a diff --git a/src/data/identifier.c b/src/data/identifier.c index c6718a5e..daab9752 100644 --- a/src/data/identifier.c +++ b/src/data/identifier.c @@ -28,16 +28,7 @@ #include #include - - -/* Table of keywords. */ -const char *const keywords[T_N_KEYWORDS + 1] = - { - "AND", "OR", "NOT", - "EQ", "GE", "GT", "LE", "LT", "NE", - "ALL", "BY", "TO", "WITH", - NULL, - }; +#include /* Recognizing identifiers. */ @@ -60,72 +51,107 @@ lex_is_idn (char c_) return lex_is_id1 (c) || isdigit (c) || c == '.' || c == '_'; } -/* If string S begins with an identifier, returns the first - character following it. Otherwise, returns S unchanged. */ -char * -lex_skip_identifier (const char *s) +/* Returns the length of the longest prefix of STRING that forms + a valid identifier. Returns zero if STRING does not begin + with a valid identifier. */ +size_t +lex_id_get_length (struct substring string) { - if (lex_is_id1 (*s)) + size_t length = 0; + if (!ss_is_empty (string) && lex_is_id1 (ss_first (string))) { - s++; - while (lex_is_idn (*s)) - s++; + length = 1; + while (length < ss_length (string) + && lex_is_idn (ss_at (string, length))) + length++; } - return (char *) s; + return length; } /* Comparing identifiers. */ -/* Keywords match if one of the following is true: KW and TOK are - identical (except for differences in case), or TOK is at least 3 - characters long and those characters are identical to KW. KW_LEN - is the length of KW, TOK_LEN is the length of TOK. */ +/* Returns true if TOKEN is a case-insensitive match for KEYWORD. + + Keywords match if one of the following is true: KEYWORD and + TOKEN are identical, or TOKEN is at least 3 characters long + and those characters are identical to KEYWORD. */ bool -lex_id_match_len (const char *kw, size_t kw_len, - const char *tok, size_t tok_len) +lex_id_match (struct substring keyword, struct substring token) { - size_t i = 0; - - assert (kw && tok); - for (;;) - { - if (i == kw_len && i == tok_len) - return true; - else if (i == tok_len) - return i >= 3; - else if (i == kw_len) - return false; - else if (toupper ((unsigned char) kw[i]) - != toupper ((unsigned char) tok[i])) - return false; - - i++; - } + size_t token_len = ss_length (token); + size_t keyword_len = ss_length (keyword); + + if (token_len >= 3 && token_len < keyword_len) + return ss_equals_case (ss_head (keyword, token_len), token); + else + return ss_equals_case (keyword, token); } + +/* Table of keywords. */ +struct keyword + { + int token; + const struct substring identifier; + }; -/* Same as lex_id_match_len() minus the need to pass in the lengths. */ +static const struct keyword keywords[] = + { + { T_AND, SS_LITERAL_INITIALIZER ("AND") }, + { T_OR, SS_LITERAL_INITIALIZER ("OR") }, + { T_NOT, SS_LITERAL_INITIALIZER ("NOT") }, + { T_EQ, SS_LITERAL_INITIALIZER ("EQ") }, + { T_GE, SS_LITERAL_INITIALIZER ("GE") }, + { T_GT, SS_LITERAL_INITIALIZER ("GT") }, + { T_LE, SS_LITERAL_INITIALIZER ("LE") }, + { T_LT, SS_LITERAL_INITIALIZER ("LT") }, + { T_NE, SS_LITERAL_INITIALIZER ("NE") }, + { T_ALL, SS_LITERAL_INITIALIZER ("ALL") }, + { T_BY, SS_LITERAL_INITIALIZER ("BY") }, + { T_TO, SS_LITERAL_INITIALIZER ("TO") }, + { T_WITH, SS_LITERAL_INITIALIZER ("WITH") }, + }; +static const size_t keyword_cnt = sizeof keywords / sizeof *keywords; + +/* Returns true if TOKEN is representable as a keyword. */ bool -lex_id_match (const char *kw, const char *tok) +lex_is_keyword (int token) { - return lex_id_match_len (kw, strlen (kw), tok, strlen (tok)); + const struct keyword *kw; + for (kw = keywords; kw < &keywords[keyword_cnt]; kw++) + if (kw->token == token) + return true; + return false; } - - -/* Returns the proper token type, either T_ID or a reserved keyword - enum, for ID[], which must contain LEN characters. */ +/* Returns the proper token type, either T_ID or a reserved + keyword enum, for ID. */ int -lex_id_to_token (const char *id, size_t len) +lex_id_to_token (struct substring id) { - const char *const *kwp; - - if (len < 2 || len > 4) - return T_ID; + if (ss_length (id) >= 2 && ss_length (id) <= 4) + { + const struct keyword *kw; + for (kw = keywords; kw < &keywords[keyword_cnt]; kw++) + if (ss_equals_case (kw->identifier, id)) + return kw->token; + } - for (kwp = keywords; *kwp; kwp++) - if (!strcasecmp (*kwp, id)) - return T_FIRST_KEYWORD + (kwp - keywords); - return T_ID; } - + +/* Returns the name for the given keyword token type. */ +const char * +lex_id_name (int token) +{ + const struct keyword *kw; + + for (kw = keywords; kw < &keywords[keyword_cnt]; kw++) + if (kw->token == token) + { + /* A "struct substring" is not guaranteed to be + null-terminated, as our caller expects, but in this + case it always will be. */ + return ss_data (kw->identifier); + } + NOT_REACHED (); +} diff --git a/src/data/identifier.h b/src/data/identifier.h index ff4149c0..eece68f5 100644 --- a/src/data/identifier.h +++ b/src/data/identifier.h @@ -17,15 +17,15 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#if !lex_def_h -#define lex_def_h 1 +#ifndef DATA_IDENTIFIER_H +#define DATA_IDENTIFIER_H 1 #include #include #include +#include /* Token types. */ -/* The order of the enumerals below is important. Do not change it. */ enum { T_ID = 256, /* Identifier. */ @@ -51,23 +51,21 @@ enum T_WITH, /* WITH */ T_EXP, /* ** */ - - T_FIRST_KEYWORD = T_AND, - T_LAST_KEYWORD = T_WITH, - T_N_KEYWORDS = T_LAST_KEYWORD - T_FIRST_KEYWORD + 1 }; +/* Tokens. */ +bool lex_is_keyword (int token); + /* Recognizing identifiers. */ bool lex_is_id1 (char); bool lex_is_idn (char); -char *lex_skip_identifier (const char *); +size_t lex_id_get_length (struct substring); /* Comparing identifiers. */ -bool lex_id_match_len (const char *keyword_string, size_t keyword_len, - const char *token_string, size_t token_len); -bool lex_id_match (const char *keyword_string, const char *token_string); -int lex_id_to_token (const char *id, size_t len); +bool lex_id_match (struct substring keyword, struct substring token); +int lex_id_to_token (struct substring); -extern const char *const keywords[T_N_KEYWORDS + 1] ; +/* Identifier names. */ +const char *lex_id_name (int); -#endif /* !lex_def_h */ +#endif /* !data/identifier.h */ diff --git a/src/data/variable.c b/src/data/variable.c index ca302415..60dd66eb 100644 --- a/src/data/variable.c +++ b/src/data/variable.c @@ -204,7 +204,7 @@ var_is_plausible_name (const char *name, bool issue_error) return false; } - if (lex_id_to_token (name, strlen (name)) != T_ID) + if (lex_id_to_token (ss_cstr (name)) != T_ID) { if (issue_error) msg (SE, _("`%s' may not be used as a variable name because it " diff --git a/src/language/ChangeLog b/src/language/ChangeLog index 45df782c..d3f3d6d8 100644 --- a/src/language/ChangeLog +++ b/src/language/ChangeLog @@ -1,3 +1,7 @@ +Sun Dec 3 11:59:10 2006 Ben Pfaff + + * syntax-file.c (read_syntax_file): Always read GETL_BATCH lines. + Wed Nov 29 19:35:44 WST 2006 John Darrington * command.c: Updated to reflect changed function names. diff --git a/src/language/control/ChangeLog b/src/language/control/ChangeLog index 17d3b510..ef5f63a4 100644 --- a/src/language/control/ChangeLog +++ b/src/language/control/ChangeLog @@ -1,3 +1,54 @@ +Sat Dec 2 17:01:59 2006 Ben Pfaff + + Significant cleanup to DO REPEAT. + + * repeat.c: (struct line_list) Rename struct repeat_line. Use + struct ll instead of explicit "next" pointer. Make "file_name" + const. Change "const char *line" to "struct substring text". Add + `syntax' member. Update all references. + (enum repeat_entry_type) Rename repeat_macro_type, update all + references. + (struct repeat_entry) Rename struct repeat_macro. Use struct ll + instead of explicit "next" pointer. Change "char[] id" to "struct + substring name". Change "char **replacement" to "struct substring + *replacements". Update all references. + (struct repeat_block) Use struct ll_list for lists of lines, + macros. Change "cur_line" to struct ll *. + (cmd_do_repeat) Don't bother adding an empty getl source at all. + This saves special-casing an empty source in repeat_read. + (parse_specification) Use new find_macro function. + (find_macro) New function. + (skip_indentor) Removed. + (recognize_keyword) Change interface, update callers. + (recognize_do_repeat) Ditto. + (recognize_end_repeat) Ditto. + (parse_lines) Use lex_preprocess_line to preprocess the input line + to check for DO REPEAT and END REPEAT, instead of coding it + inline. + (parse_ids) Need to make a copy of the array parsed by + parse_mixed_vars_pool, instead of using it verbatim, because we're + using struct substring now. + (add_replacement) Use struct substring in interface, instead of + const char *, and update all callers. + (find_substitution) Rewrite using substring, in terms of + find_macro. + (do_repeat_filter) Use struct substring to simplify code. + (current_line) New function. + +Sat Dec 2 16:40:12 2006 Ben Pfaff + + Fix lack of ->name and ->location in DO REPEAT's getl_interface. + See bug #15702. + + * repeat.c: (cmd_do_repeat) Initialize name, location in + getl_interface. + (do_repeat_read) Adjust semantics of cur_line so that, after the + call, it points to the line just returned, instead of to the next + line to be returned. Thus, do_repeat_name and do_repeat_location + can use cur_line to obtain the info they need. + (do_repeat_name) New function. + (do_repeat_location) New function. + Thu Nov 30 22:01:27 2006 Ben Pfaff * repeat.c (do_repeat_read): Properly handle empty DO REPEAT...END diff --git a/src/language/control/repeat.c b/src/language/control/repeat.c index 951c0094..200b6bce 100644 --- a/src/language/control/repeat.c +++ b/src/language/control/repeat.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -44,43 +45,47 @@ #include "gettext.h" #define _(msgid) gettext (msgid) -/* Defines a list of lines used by DO REPEAT. */ -struct line_list +/* A line repeated by DO REPEAT. */ +struct repeat_line { - struct line_list *next; /* Next line. */ - char *file_name; /* File name. */ + struct ll ll; /* In struct repeat_block line_list. */ + const char *file_name; /* File name. */ int line_number; /* Line number. */ - char *line; /* Contents. */ + struct substring text; /* Contents. */ + enum getl_syntax syntax; /* Syntax mode. */ }; /* The type of substitution made for a DO REPEAT macro. */ -enum repeat_entry_type +enum repeat_macro_type { VAR_NAMES, OTHER }; /* Describes one DO REPEAT macro. */ -struct repeat_entry +struct repeat_macro { - struct repeat_entry *next; /* Next entry. */ - enum repeat_entry_type type; /* Types of replacements. */ - char id[LONG_NAME_LEN + 1]; /* Macro identifier. */ - char **replacement; /* Macro replacement. */ + struct ll ll; /* In struct repeat_block macros. */ + enum repeat_macro_type type; /* Types of replacements. */ + struct substring name; /* Macro name. */ + struct substring *replacements; /* Macro replacement. */ }; /* A DO REPEAT...END REPEAT block. */ struct repeat_block { - struct getl_interface parent ; + struct getl_interface parent; struct pool *pool; /* Pool used for storage. */ struct dataset *ds; /* The dataset for this block */ - struct line_list *first_line; /* First line in line buffer. */ - struct line_list *cur_line; /* Current line in line buffer. */ + + struct ll_list lines; /* Lines in buffer. */ + struct ll *cur_line; /* Last line output. */ int loop_cnt; /* Number of loops. */ int loop_idx; /* Number of loops so far. */ - struct repeat_entry *macros; /* Pointer to macro table. */ + + struct ll_list macros; /* Table of macros. */ + bool print; /* Print lines as executed? */ }; @@ -88,20 +93,26 @@ static bool parse_specification (struct lexer *, struct repeat_block *); static bool parse_lines (struct lexer *, struct repeat_block *); static void create_vars (struct repeat_block *); +static struct repeat_macro *find_macro (struct repeat_block *, + struct substring name); + static int parse_ids (struct lexer *, const struct dictionary *dict, - struct repeat_entry *, struct pool *); + struct repeat_macro *, struct pool *); -static int parse_numbers (struct lexer *, struct repeat_entry *, +static int parse_numbers (struct lexer *, struct repeat_macro *, struct pool *); -static int parse_strings (struct lexer *, struct repeat_entry *, +static int parse_strings (struct lexer *, struct repeat_macro *, struct pool *); -static void do_repeat_filter (struct getl_interface *, struct string *); -static bool do_repeat_read (struct getl_interface *, struct string *); +static void do_repeat_filter (struct getl_interface *, + struct string *, enum getl_syntax); +static bool do_repeat_read (struct getl_interface *, + struct string *, enum getl_syntax *); static void do_repeat_close (struct getl_interface *); -static bool always_false (const struct getl_interface *i UNUSED); - +static bool always_false (const struct getl_interface *); +static const char *do_repeat_name (const struct getl_interface *); +static int do_repeat_location (const struct getl_interface *); int cmd_do_repeat (struct lexer *lexer, struct dataset *ds) @@ -110,21 +121,27 @@ cmd_do_repeat (struct lexer *lexer, struct dataset *ds) block = pool_create_container (struct repeat_block, pool); block->ds = ds; + ll_init (&block->lines); + block->cur_line = ll_null (&block->lines); + block->loop_idx = 0; + ll_init (&block->macros); if (!parse_specification (lexer, block) || !parse_lines (lexer, block)) goto error; create_vars (block); - - block->cur_line = NULL; - block->loop_idx = -1; block->parent.read = do_repeat_read; block->parent.close = do_repeat_close; block->parent.filter = do_repeat_filter; block->parent.interactive = always_false; + block->parent.name = do_repeat_name; + block->parent.location = do_repeat_location; - getl_include_source ( (struct getl_interface *) block); + if (!ll_is_empty (&block->lines)) + getl_include_source (&block->parent); + else + pool_destroy (block->pool); return CMD_SUCCESS; @@ -138,14 +155,12 @@ cmd_do_repeat (struct lexer *lexer, struct dataset *ds) static bool parse_specification (struct lexer *lexer, struct repeat_block *block) { - char first_name[LONG_NAME_LEN + 1]; + struct substring first_name; block->loop_cnt = 0; - block->macros = NULL; do { - struct repeat_entry *e; - struct repeat_entry *iter; + struct repeat_macro *macro; struct dictionary *dict = dataset_dict (block->ds); int count; @@ -156,20 +171,18 @@ parse_specification (struct lexer *lexer, struct repeat_block *block) msg (SW, _("Dummy variable name \"%s\" hides dictionary " "variable \"%s\"."), lex_tokid (lexer), lex_tokid (lexer)); - for (iter = block->macros; iter != NULL; iter = iter->next) - if (!strcasecmp (iter->id, lex_tokid (lexer))) + if (find_macro (block, ss_cstr (lex_tokid (lexer)))) { msg (SE, _("Dummy variable name \"%s\" is given twice."), lex_tokid (lexer)); return false; } - /* Make a new stand-in variable entry and link it into the - list. */ - e = pool_alloc (block->pool, sizeof *e); - e->next = block->macros; - strcpy (e->id, lex_tokid (lexer)); - block->macros = e; + /* Make a new macro. */ + macro = pool_alloc (block->pool, sizeof *macro); + ss_alloc_substring_pool (¯o->name, ss_cstr (lex_tokid (lexer)), + block->pool); + ll_push_tail (&block->macros, ¯o->ll); /* Skip equals sign. */ lex_get (lexer); @@ -178,11 +191,11 @@ parse_specification (struct lexer *lexer, struct repeat_block *block) /* Get the details of the variable's possible values. */ if (lex_token (lexer) == T_ID) - count = parse_ids (lexer, dict, e, block->pool); + count = parse_ids (lexer, dict, macro, block->pool); else if (lex_is_number (lexer)) - count = parse_numbers (lexer, e, block->pool); + count = parse_numbers (lexer, macro, block->pool); else if (lex_token (lexer) == T_STRING) - count = parse_strings (lexer, e, block->pool); + count = parse_strings (lexer, macro, block->pool); else { lex_error (lexer, NULL); @@ -202,14 +215,17 @@ parse_specification (struct lexer *lexer, struct repeat_block *block) if (block->loop_cnt == 0) { block->loop_cnt = count; - strcpy (first_name, e->id); + first_name = macro->name; } else if (block->loop_cnt != count) { - msg (SE, _("Dummy variable \"%s\" had %d " - "substitutions, so \"%s\" must also, but %d " + msg (SE, _("Dummy variable \"%.*s\" had %d " + "substitutions, so \"%.*s\" must also, but %d " "were specified."), - first_name, block->loop_cnt, e->id, count); + (int) ss_length (first_name), ss_data (first_name), + block->loop_cnt, + (int) ss_length (macro->name), ss_data (macro->name), + count); return false; } @@ -220,58 +236,52 @@ parse_specification (struct lexer *lexer, struct repeat_block *block) return true; } -/* If KEYWORD appears beginning at CP, possibly preceded by white - space, returns a pointer to the character just after the - keyword. Otherwise, returns a null pointer. */ -static const char * -recognize_keyword (const char *cp, const char *keyword) +/* Finds and returns a DO REPEAT macro with the given NAME, or + NULL if there is none */ +static struct repeat_macro * +find_macro (struct repeat_block *block, struct substring name) { - const char *end; - - while (isspace ((unsigned char) *cp)) - cp++; + struct repeat_macro *macro; + + ll_for_each (macro, struct repeat_macro, ll, &block->macros) + if (ss_equals (macro->name, name)) + return macro; - end = lex_skip_identifier (cp); - if (end != cp - && lex_id_match_len (keyword, strlen (keyword), cp, end - cp)) - return end; - else - return NULL; + return NULL; } -/* Returns CP, advanced past a '+' or '-' if present. */ -static const char * -skip_indentor (const char *cp) +/* Advances LINE past white space and an identifier, if present. + Returns true if KEYWORD matches the identifer, false + otherwise. */ +static bool +recognize_keyword (struct substring *line, const char *keyword) { - if (*cp == '+' || *cp == '-') - cp++; - return cp; + struct substring id; + ss_ltrim (line, ss_cstr (CC_SPACES)); + ss_get_chars (line, lex_id_get_length (*line), &id); + return lex_id_match (ss_cstr (keyword), id); } /* Returns true if LINE contains a DO REPEAT command, false otherwise. */ static bool -recognize_do_repeat (const char *line) +recognize_do_repeat (struct substring line) { - const char *cp = recognize_keyword (skip_indentor (line), "do"); - return cp != NULL && recognize_keyword (cp, "repeat") != NULL; + return (recognize_keyword (&line, "do") + && recognize_keyword (&line, "repeat")); } /* Returns true if LINE contains an END REPEAT command, false otherwise. Sets *PRINT to true for END REPEAT PRINT, false otherwise. */ static bool -recognize_end_repeat (const char *line, bool *print) +recognize_end_repeat (struct substring line, bool *print) { - const char *cp = recognize_keyword (skip_indentor (line), "end"); - if (cp == NULL) + if (!recognize_keyword (&line, "end") + || !recognize_keyword (&line, "repeat")) return false; - cp = recognize_keyword (cp, "repeat"); - if (cp == NULL) - return false; - - *print = recognize_keyword (cp, "print"); + *print = recognize_keyword (&line, "print"); return true; } @@ -281,72 +291,66 @@ static bool parse_lines (struct lexer *lexer, struct repeat_block *block) { char *previous_file_name; - struct line_list **last_line; int nesting_level; previous_file_name = NULL; - block->first_line = NULL; - last_line = &block->first_line; nesting_level = 0; for (;;) { - const char *cur_file_name = getl_source_name (); - int cur_line_number = getl_source_location (); - struct line_list *line; - struct string cur_line_copy; - bool dot; - - if (! lex_get_line_raw (lexer)) + const char *cur_file_name; + struct repeat_line *line; + struct string text; + enum getl_syntax syntax; + bool command_ends_before_line, command_ends_after_line; + + /* Retrieve an input line and make a copy of it. */ + if (!lex_get_line_raw (lexer, &syntax)) return false; + ds_init_string (&text, lex_entire_line_ds (lexer)); - /* If the current file has changed then record the fact. */ - if (cur_file_name && - (previous_file_name == NULL - || !strcmp (cur_file_name, previous_file_name)) - ) + /* Record file name. */ + cur_file_name = getl_source_name (); + if (cur_file_name != NULL && + (previous_file_name == NULL + || !strcmp (cur_file_name, previous_file_name))) previous_file_name = pool_strdup (block->pool, cur_file_name); - ds_init_string (&cur_line_copy, lex_entire_line_ds (lexer) ); - ds_rtrim (&cur_line_copy, ss_cstr (CC_SPACES)); - dot = ds_chomp (&cur_line_copy, get_endcmd ()); - - if (recognize_do_repeat (ds_cstr (&cur_line_copy))) + /* Create a line structure. */ + line = pool_alloc (block->pool, sizeof *line); + line->file_name = previous_file_name; + line->line_number = getl_source_location (); + ss_alloc_substring_pool (&line->text, ds_ss (&text), block->pool); + line->syntax = syntax; + + /* Check whether the line contains a DO REPEAT or END + REPEAT command. */ + lex_preprocess_line (&text, syntax, + &command_ends_before_line, + &command_ends_after_line); + if (recognize_do_repeat (ds_ss (&text))) nesting_level++; - else if (recognize_end_repeat (ds_cstr (&cur_line_copy), &block->print)) + else if (recognize_end_repeat (ds_ss (&text), &block->print) + && nesting_level-- == 0) { - if (nesting_level-- == 0) - { - lex_discard_line (lexer); - ds_destroy (&cur_line_copy); - return true; - } + lex_discard_line (lexer); + return true; } - if (dot) - ds_put_char (&cur_line_copy, get_endcmd ()); - - line = *last_line = pool_alloc (block->pool, sizeof *line); - line->next = NULL; - line->file_name = previous_file_name; - line->line_number = cur_line_number; - line->line = pool_strdup (block->pool, ds_cstr (&cur_line_copy) ); - last_line = &line->next; + ds_destroy (&text); - ds_destroy (&cur_line_copy); + /* Add the line to the list. */ + ll_push_tail (&block->lines, &line->ll); } - - lex_discard_line (lexer); - return true; } /* Creates variables for the given DO REPEAT. */ static void create_vars (struct repeat_block *block) { - struct repeat_entry *iter; - - for (iter = block->macros; iter; iter = iter->next) - if (iter->type == VAR_NAMES) + struct repeat_macro *macro; + + ll_for_each (macro, struct repeat_macro, ll, &block->macros) + if (macro->type == VAR_NAMES) { int i; @@ -354,7 +358,9 @@ create_vars (struct repeat_block *block) { /* Ignore return value: if the variable already exists there is no harm done. */ - dict_create_var (dataset_dict (block->ds), iter->replacement[i], 0); + char *var_name = ss_xstrdup (macro->replacements[i]); + dict_create_var (dataset_dict (block->ds), var_name, 0); + free (var_name); } } } @@ -362,37 +368,44 @@ create_vars (struct repeat_block *block) /* Parses a set of ids for DO REPEAT. */ static int parse_ids (struct lexer *lexer, const struct dictionary *dict, - struct repeat_entry *e, struct pool *pool) + struct repeat_macro *macro, struct pool *pool) { - size_t n = 0; - e->type = VAR_NAMES; - return parse_mixed_vars_pool (lexer, dict, pool, - &e->replacement, &n, PV_NONE) ? n : 0; + char **replacements; + size_t n, i; + + macro->type = VAR_NAMES; + if (!parse_mixed_vars_pool (lexer, dict, pool, &replacements, &n, PV_NONE)) + return 0; + + macro->replacements = pool_nalloc (pool, n, sizeof *macro->replacements); + for (i = 0; i < n; i++) + macro->replacements[i] = ss_cstr (replacements[i]); + return n; } -/* Adds STRING to E's list of replacements, which has *USED - elements and has room for *ALLOCATED. Allocates memory from - POOL. */ +/* Adds REPLACEMENT to MACRO's list of replacements, which has + *USED elements and has room for *ALLOCATED. Allocates memory + from POOL. */ static void -add_replacement (char *string, - struct repeat_entry *e, struct pool *pool, +add_replacement (struct substring replacement, + struct repeat_macro *macro, struct pool *pool, size_t *used, size_t *allocated) { if (*used == *allocated) - e->replacement = pool_2nrealloc (pool, e->replacement, allocated, - sizeof *e->replacement); - e->replacement[(*used)++] = string; + macro->replacements = pool_2nrealloc (pool, macro->replacements, allocated, + sizeof *macro->replacements); + macro->replacements[(*used)++] = replacement; } /* Parses a list of numbers for DO REPEAT. */ static int -parse_numbers (struct lexer *lexer, struct repeat_entry *e, struct pool *pool) +parse_numbers (struct lexer *lexer, struct repeat_macro *macro, struct pool *pool) { size_t used = 0; size_t allocated = 0; - e->type = OTHER; - e->replacement = NULL; + macro->type = OTHER; + macro->replacements = NULL; do { @@ -421,9 +434,8 @@ parse_numbers (struct lexer *lexer, struct repeat_entry *e, struct pool *pool) b = a; for (i = a; i <= b; i++) - add_replacement (pool_asprintf (pool, "%ld", i), - e, pool, &used, &allocated); - + add_replacement (ss_cstr (pool_asprintf (pool, "%ld", i)), + macro, pool, &used, &allocated); lex_match (lexer, ','); } @@ -434,13 +446,13 @@ parse_numbers (struct lexer *lexer, struct repeat_entry *e, struct pool *pool) /* Parses a list of strings for DO REPEAT. */ int -parse_strings (struct lexer *lexer, struct repeat_entry *e, struct pool *pool) +parse_strings (struct lexer *lexer, struct repeat_macro *macro, struct pool *pool) { size_t used = 0; size_t allocated = 0; - e->type = OTHER; - e->replacement = NULL; + macro->type = OTHER; + macro->replacements = NULL; do { @@ -454,7 +466,7 @@ parse_strings (struct lexer *lexer, struct repeat_entry *e, struct pool *pool) string = lex_token_representation (lexer); pool_register (pool, free, string); - add_replacement (string, e, pool, &used, &allocated); + add_replacement (ss_cstr (string), macro, pool, &used, &allocated); lex_get (lexer); lex_match (lexer, ','); @@ -471,59 +483,52 @@ cmd_end_repeat (struct lexer *lexer UNUSED, struct dataset *ds UNUSED) return CMD_CASCADING_FAILURE; } -/* Finds a DO REPEAT macro with name MACRO_NAME and returns the - appropriate subsitution if found, or NULL if not. */ -static char * -find_substitution (struct repeat_block *block, const char *name, size_t length) +/* Finds a DO REPEAT macro with the given NAME and returns the + appropriate substitution if found, or NAME otherwise. */ +static struct substring +find_substitution (struct repeat_block *block, struct substring name) { - struct repeat_entry *e; - - for (e = block->macros; e; e = e->next) - if (!memcasecmp (e->id, name, length) && strlen (e->id) == length) - return e->replacement[block->loop_idx]; - - return NULL; + struct repeat_macro *macro = find_macro (block, name); + return macro ? macro->replacements[block->loop_idx] : name; } /* Makes appropriate DO REPEAT macro substitutions within the repeated lines. */ static void -do_repeat_filter (struct getl_interface *block_, struct string *line) +do_repeat_filter (struct getl_interface *block_, + struct string *line, enum getl_syntax syntax UNUSED) { struct repeat_block *block = (struct repeat_block *) block_; - bool in_apos, in_quote; - char *cp; + bool in_apos, in_quote, dot; + struct substring input; struct string output; - bool dot; + int c; ds_init_empty (&output); /* Strip trailing whitespace, check for & remove terminal dot. */ - while (isspace (ds_last (line))) - ds_truncate (line, ds_length (line) - 1); + ds_rtrim (line, ss_cstr (CC_SPACES)); dot = ds_chomp (line, get_endcmd ()); + input = ds_ss (line); in_apos = in_quote = false; - for (cp = ds_cstr (line); cp < ds_end (line); ) + while ((c = ss_first (input)) != EOF) { - if (*cp == '\'' && !in_quote) + if (c == '\'' && !in_quote) in_apos = !in_apos; - else if (*cp == '"' && !in_apos) + else if (c == '"' && !in_apos) in_quote = !in_quote; - if (in_quote || in_apos || !lex_is_id1 (*cp)) - ds_put_char (&output, *cp++); + if (in_quote || in_apos || !lex_is_id1 (c)) + { + ds_put_char (&output, c); + ss_advance (&input, 1); + } else { - const char *start = cp; - char *end = lex_skip_identifier (start); - const char *substitution = find_substitution (block, - start, end - start); - if (substitution != NULL) - ds_put_cstr (&output, substitution); - else - ds_put_substring (&output, ss_buffer (start, end - start)); - cp = end; + struct substring id; + ss_get_chars (&input, lex_id_get_length (input), &id); + ds_put_substring (&output, find_substitution (block, id)); } } if (dot) @@ -533,29 +538,38 @@ do_repeat_filter (struct getl_interface *block_, struct string *line) ds_destroy (&output); } -/* Function called by getl to read a line. - Puts the line in OUTPUT, sets the file name in *FILE_NAME and - line number in *LINE_NUMBER. Returns true if a line was - obtained, false if the source is exhausted. */ +static struct repeat_line * +current_line (const struct getl_interface *interface) +{ + struct repeat_block *block = (struct repeat_block *) interface; + return (block->cur_line != ll_null (&block->lines) + ? ll_data (block->cur_line, struct repeat_line, ll) + : NULL); +} + +/* Function called by getl to read a line. Puts the line in + OUTPUT and its syntax mode in *SYNTAX. Returns true if a line + was obtained, false if the source is exhausted. */ static bool -do_repeat_read (struct getl_interface *b, struct string *output) +do_repeat_read (struct getl_interface *interface, + struct string *output, enum getl_syntax *syntax) { - struct repeat_block *block = (struct repeat_block *) b; - struct line_list *line; + struct repeat_block *block = (struct repeat_block *) interface; + struct repeat_line *line; - if (block->cur_line == NULL) + block->cur_line = ll_next (block->cur_line); + if (block->cur_line == ll_null (&block->lines)) { block->loop_idx++; if (block->loop_idx >= block->loop_cnt) return false; - block->cur_line = block->first_line; - if (block->cur_line == NULL) - return false; + + block->cur_line = ll_head (&block->lines); } - line = block->cur_line; - ds_assign_cstr (output, line->line); - block->cur_line = line->next; + line = current_line (interface); + ds_assign_substring (output, line->text); + *syntax = line->syntax; return true; } @@ -574,3 +588,21 @@ always_false (const struct getl_interface *i UNUSED) { return false; } + +/* Returns the name of the source file from which the previous + line was originally obtained, or a null pointer if none. */ +static const char * +do_repeat_name (const struct getl_interface *interface) +{ + struct repeat_line *line = current_line (interface); + return line ? line->file_name : NULL; +} + +/* Returns the line number in the source file from which the + previous line was originally obtained, or -1 if none. */ +static int +do_repeat_location (const struct getl_interface *interface) +{ + struct repeat_line *line = current_line (interface); + return line ? line->line_number : -1; +} diff --git a/src/language/data-io/data-reader.c b/src/language/data-io/data-reader.c index 49ec7789..7b269065 100644 --- a/src/language/data-io/data-reader.c +++ b/src/language/data-io/data-reader.c @@ -172,7 +172,7 @@ read_inline_record (struct dfm_reader *r) prompt_set_style (PROMPT_DATA); } - if (!lex_get_line_raw (r->lexer)) + if (!lex_get_line_raw (r->lexer, NULL)) { msg (SE, _("Unexpected end-of-file while reading data in BEGIN " "DATA. This probably indicates " diff --git a/src/language/data-io/get.c b/src/language/data-io/get.c index be561416..3aa06039 100644 --- a/src/language/data-io/get.c +++ b/src/language/data-io/get.c @@ -835,7 +835,8 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds) lex_match (lexer, '/'); while (lex_token (lexer) == T_ID - && (lex_id_match ("FILE", lex_tokid (lexer)) || lex_id_match ("TABLE", lex_tokid (lexer)))) + && (lex_id_match (ss_cstr ("FILE"), ss_cstr (lex_tokid (lexer))) + || lex_id_match (ss_cstr ("TABLE"), ss_cstr (lex_tokid (lexer))))) { struct mtf_file *file = xmalloc (sizeof *file); diff --git a/src/language/dictionary/sys-file-info.c b/src/language/dictionary/sys-file-info.c index 44278060..d0f2147f 100644 --- a/src/language/dictionary/sys-file-info.c +++ b/src/language/dictionary/sys-file-info.c @@ -221,7 +221,8 @@ cmd_display (struct lexer *lexer, struct dataset *ds) sorted = lex_match_id (lexer, "SORTED"); for (cp = sbc; *cp; cp++) - if (lex_token (lexer) == T_ID && lex_id_match (*cp, lex_tokid (lexer))) + if (lex_token (lexer) == T_ID + && lex_id_match (ss_cstr (*cp), ss_cstr (lex_tokid (lexer)))) { lex_get (lexer); break; diff --git a/src/language/lexer/ChangeLog b/src/language/lexer/ChangeLog index ebbf2544..db33299a 100644 --- a/src/language/lexer/ChangeLog +++ b/src/language/lexer/ChangeLog @@ -1,3 +1,30 @@ +Sat Dec 2 21:19:50 2006 Ben Pfaff + + General clean-up. + + * lexer.c: (lex_token_name) Don't use a static buffer or, rather, + use a separate static buffer for each possible answer. + (lex_token_representation) Now use lex_token_name as building + block. Previously this broke lex_force_match because it uses + lex_token_name in a call to lex_error, which in turn uses + lex_token_representation. + (lex_force_match_id) Use lex_match_id as building block, to + simplify. + (parse_id) Rewrite to work with modified lex_id_to_token and + lex_id_get_length. The computation of rest_of_line is a bit of an + abomination but it will get fixed later. + +Sat Dec 2 20:16:50 2006 Ben Pfaff + + * lexer.c (struct lexer): Change function signature for + `read_line' to take an "enum getl_syntax *" instead of "bool *". + (lex_create) Ditto, for argument. + (lex_preprocess_line) New function. + (lex_get_line_raw) New arg, to allow caller to obtain getl_syntax + of the line read. + (lex_get_line) Use lex_get_line_raw and lex_preprocess_line to + simplify. + Sun Nov 19 09:20:42 2006 Ben Pfaff * range-parser.c (parse_num_range): Because data_in takes an enum diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index 9c6063fd..907a77ce 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -49,7 +49,7 @@ struct lexer { struct string line_buffer; - bool (*read_line) (struct string *, bool *); + bool (*read_line) (struct string *, enum getl_syntax *); int token; /* Current token. */ double tokval; /* T_POS_NUM, T_NEG_NUM: the token's value. */ @@ -93,7 +93,7 @@ static void dump_token (void); /* Initializes the lexer. */ struct lexer * -lex_create (bool (*read_line_func) (struct string *, bool *)) +lex_create (bool (*read_line_func) (struct string *, enum getl_syntax *)) { struct lexer *lexer = xzalloc (sizeof (*lexer)); @@ -389,12 +389,17 @@ lex_get (struct lexer *lexer) static int parse_id (struct lexer *lexer) { - const char *start = lexer->prog; - lexer->prog = lex_skip_identifier (start); - - ds_put_substring (&lexer->tokstr, ss_buffer (start, lexer->prog - start)); + struct substring rest_of_line + = ss_substr (ds_ss (&lexer->line_buffer), + ds_pointer_to_position (&lexer->line_buffer, lexer->prog), + SIZE_MAX); + struct substring id = ss_head (rest_of_line, + lex_id_get_length (rest_of_line)); + lexer->prog += ss_length (id); + + ds_assign_substring (&lexer->tokstr, id); str_copy_trunc (lexer->tokid, sizeof lexer->tokid, ds_cstr (&lexer->tokstr)); - return lex_id_to_token (ds_cstr (&lexer->tokstr), ds_length (&lexer->tokstr)); + return lex_id_to_token (id); } /* Reports an error to the effect that subcommand SBC may only be @@ -522,7 +527,8 @@ lex_match (struct lexer *lexer, int t) bool lex_match_id (struct lexer *lexer, const char *s) { - if (lexer->token == T_ID && lex_id_match (s, lexer->tokid)) + if (lexer->token == T_ID + && lex_id_match (ss_cstr (s), ss_cstr (lexer->tokid))) { lex_get (lexer); return true; @@ -553,11 +559,8 @@ lex_match_int (struct lexer *lexer, int x) bool lex_force_match_id (struct lexer *lexer, const char *s) { - if (lexer->token == T_ID && lex_id_match (s, lexer->tokid)) - { - lex_get (lexer); - return true; - } + if (lex_match_id (lexer, s)) + return true; else { lex_error (lexer, _("expecting `%s'"), s); @@ -693,7 +696,7 @@ lex_put_back (struct lexer *lexer, int t) void lex_put_back_id (struct lexer *lexer, const char *id) { - assert (lex_id_to_token (id, strlen (id)) == T_ID); + assert (lex_id_to_token (ss_cstr (id)) == T_ID); save_token (lexer); lexer->token = T_ID; ds_assign_cstr (&lexer->tokstr, id); @@ -808,12 +811,40 @@ strip_comments (struct string *string) } } -/* Reads a line, without performing any preprocessing */ +/* Prepares LINE, which is subject to the given SYNTAX rules, for + tokenization by stripping comments and determining whether it + is the beginning or end of a command and storing into + *LINE_STARTS_COMMAND and *LINE_ENDS_COMMAND appropriately. */ +void +lex_preprocess_line (struct string *line, + enum getl_syntax syntax, + bool *line_starts_command, + bool *line_ends_command) +{ + strip_comments (line); + ds_rtrim (line, ss_cstr (CC_SPACES)); + *line_ends_command = (ds_chomp (line, get_endcmd ()) + || (ds_is_empty (line) && get_nulline ())); + *line_starts_command = false; + if (syntax == GETL_BATCH) + { + int first = ds_first (line); + *line_starts_command = !isspace (first); + if (first == '+' || first == '-') + *ds_data (line) = ' '; + } +} + +/* Reads a line, without performing any preprocessing. + Sets *SYNTAX, if SYNTAX is non-null, to the line's syntax + mode. */ bool -lex_get_line_raw (struct lexer *lexer) +lex_get_line_raw (struct lexer *lexer, enum getl_syntax *syntax) { - bool dummy; - return lexer->read_line (&lexer->line_buffer, &dummy); + enum getl_syntax dummy; + bool ok = lexer->read_line (&lexer->line_buffer, + syntax != NULL ? syntax : &dummy); + return ok; } /* Reads a line for use by the tokenizer, and preprocesses it by @@ -822,53 +853,39 @@ lex_get_line_raw (struct lexer *lexer) bool lex_get_line (struct lexer *lexer) { - struct string *line = &lexer->line_buffer; - bool interactive; + bool line_starts_command; + enum getl_syntax syntax; - if (!lexer->read_line (line, &interactive)) + if (!lex_get_line_raw (lexer, &syntax)) return false; - strip_comments (line); - ds_rtrim (line, ss_cstr (CC_SPACES)); - - /* Check for and remove terminal dot. */ - lexer->dot = (ds_chomp (line, get_endcmd ()) - || (ds_is_empty (line) && get_nulline ())); - - /* Strip leading indentors or insert a terminal dot (unless the - line was obtained interactively). */ - if (!interactive) - { - int first = ds_first (line); - - if (first == '+' || first == '-') - *ds_data (line) = ' '; - else if (first != EOF && !isspace (first)) - lexer->put_token = '.'; - } - - lexer->prog = ds_cstr (line); + lex_preprocess_line (&lexer->line_buffer, syntax, + &line_starts_command, &lexer->dot); + if (line_starts_command) + lexer->put_token = '.'; + lexer->prog = ds_cstr (&lexer->line_buffer); return true; } /* Token names. */ -/* Returns the name of a token in a static buffer. */ +/* Returns the name of a token. */ const char * lex_token_name (int token) { - if (token >= T_FIRST_KEYWORD && token <= T_LAST_KEYWORD) - return keywords[token - T_FIRST_KEYWORD]; - - if (token < 256) + if (lex_is_keyword (token)) + return lex_id_name (token); + else if (token < 256) { - static char t[2]; - t[0] = token; - return t; + static char t[256][2]; + char *s = t[token]; + s[0] = token; + s[1] = '\0'; + return s; } - - NOT_REACHED (); + else + NOT_REACHED (); } /* Returns an ASCII representation of the current token as a @@ -934,15 +951,7 @@ lex_token_representation (struct lexer *lexer) return xstrdup ("**"); default: - if (lexer->token >= T_FIRST_KEYWORD && lexer->token <= T_LAST_KEYWORD) - return xstrdup (keywords [lexer->token - T_FIRST_KEYWORD]); - else - { - token_rep = xmalloc (2); - token_rep[0] = lexer->token; - token_rep[1] = '\0'; - return token_rep; - } + return xstrdup (lex_token_name (lexer->token)); } NOT_REACHED (); @@ -1225,7 +1234,7 @@ dump_token (struct lexer *lexer) break; default: - if (lexer->token >= T_FIRST_KEYWORD && lexer->token <= T_LAST_KEYWORD) + if (lex_is_keyword (token)) fprintf (stderr, "KEYWORD\t%s\n", lex_token_name (token)); else fprintf (stderr, "PUNCT\t%c\n", lexer->token); diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h index 792788c5..509173d9 100644 --- a/src/language/lexer/lexer.h +++ b/src/language/lexer/lexer.h @@ -20,17 +20,17 @@ #if !lexer_h #define lexer_h 1 -#include #include #include -#include - #include +#include +#include +#include -struct lexer ; +struct lexer; /* Initialization. */ -struct lexer * lex_create (bool (*)(struct string *, bool*)); +struct lexer * lex_create (bool (*)(struct string *, enum getl_syntax *)); void lex_destroy (struct lexer *); @@ -71,12 +71,15 @@ void lex_put_back_id (struct lexer *, const char *tokid); const char *lex_entire_line (struct lexer *); const struct string *lex_entire_line_ds (struct lexer *); const char *lex_rest_of_line (struct lexer *, int *end_dot); +void lex_preprocess_line (struct string *, enum getl_syntax, + bool *line_starts_command, + bool *line_ends_command); void lex_discard_line (struct lexer *); void lex_discard_rest_of_command (struct lexer *); /* Weird line reading functions. */ bool lex_get_line (struct lexer *); -bool lex_get_line_raw (struct lexer *); +bool lex_get_line_raw (struct lexer *, enum getl_syntax *); /* Token names. */ const char *lex_token_name (int); diff --git a/src/language/syntax-file.c b/src/language/syntax-file.c index e41466ce..4f667c12 100644 --- a/src/language/syntax-file.c +++ b/src/language/syntax-file.c @@ -78,7 +78,8 @@ line_number (const struct getl_interface *s) /* Reads a line from syntax file source S into LINE. Returns true if successful, false at end of file. */ bool -read_syntax_file (struct getl_interface *s, struct string *line) +read_syntax_file (struct getl_interface *s, + struct string *line, enum getl_syntax *syntax) { struct syntax_file_source *sfs = (struct syntax_file_source *) s; @@ -114,6 +115,7 @@ read_syntax_file (struct getl_interface *s, struct string *line) if (get_echo ()) tab_output_text (TAB_LEFT | TAB_FIX, ds_cstr (line)); + *syntax = GETL_BATCH; return true; } diff --git a/src/language/syntax-file.h b/src/language/syntax-file.h index c09af216..879b1b92 100644 --- a/src/language/syntax-file.h +++ b/src/language/syntax-file.h @@ -21,12 +21,12 @@ #define SYNTAX_FILE 1 #include +#include struct string; -struct getl_interface; -struct getl_source; -bool read_syntax_file (struct getl_interface *s, struct string *line); +bool read_syntax_file (struct getl_interface *s, + struct string *line, enum getl_syntax *syntax); /* Creates a syntax file source with file name FN. */ struct getl_interface * create_syntax_file_source (const char *fn) ; diff --git a/src/language/utilities/set.q b/src/language/utilities/set.q index d6252cf4..e47993e0 100644 --- a/src/language/utilities/set.q +++ b/src/language/utilities/set.q @@ -369,7 +369,7 @@ stc_custom_blanks (struct lexer *lexer, struct cmd_set *cmd UNUSED, void *aux UNUSED) { lex_match (lexer, '='); - if ((lex_token (lexer) == T_ID && lex_id_match ("SYSMIS", lex_tokid (lexer)))) + if (lex_match_id (lexer, "SYSMIS")) { lex_get (lexer); set_blanks (SYSMIS); diff --git a/src/libpspp/ChangeLog b/src/libpspp/ChangeLog index 6e09dfb2..c561caf6 100644 --- a/src/libpspp/ChangeLog +++ b/src/libpspp/ChangeLog @@ -1,3 +1,28 @@ +Sun Dec 3 11:36:10 2006 Ben Pfaff + + * str.h (SS_LITERAL_INITIALIZER): Cast the string literal to "char + *". This normally does nothing but when GCC's -Wwrite-strings is + used it fixes a warning that otherwise can't be avoided. + +Sun Dec 3 11:35:35 2006 Ben Pfaff + + * str.c (ss_alloc_substring_pool): New function. + (ss_alloc_uninit_pool) New function. + +Sun Dec 3 11:28:06 2006 Ben Pfaff + + * getl.h: (enum getl_syntax) New enumeration to distinguish + between "batch" and "interactive" in a clearer way than a bool. + (struct getl_interface) Add an arg to "read" to return the + intended syntax mode. Add an arg to "filter" to specify the + syntax mode of the line to filter. + + * getl.c (do_read_line): Instead of returning the syntax type of + the line read based on whether the source itself is interactive, + return it based on whether the line itself should be treated as + having batch or interactive syntax. Also, adapt interface to the + new interfaces of lex_init() and getl_interface. + Wed Nov 29 19:35:44 WST 2006 John Darrington * getl.c getl.h: New files. Created interface from base of diff --git a/src/libpspp/getl.c b/src/libpspp/getl.c index 1c18733d..200e07e3 100644 --- a/src/libpspp/getl.c +++ b/src/libpspp/getl.c @@ -201,26 +201,21 @@ getl_uninitialize (void) /* Reads a single line into LINE. Returns true when a line has been read, false at end of input. - If INTERACTIVE is non-null, then when true is returned - *INTERACTIVE will be set to true if the line was obtained - interactively, false otherwise. */ + On success, sets *SYNTAX to the style of the syntax read. */ bool -do_read_line (struct string *line, bool *interactive) +do_read_line (struct string *line, enum getl_syntax *syntax) { while (!ll_is_empty (&sources)) { struct getl_source *s = current_source (&sources); ds_clear (line); - if (s->interface->read (s->interface, line)) + if (s->interface->read (s->interface, line, syntax)) { - if (interactive != NULL) - *interactive = s->interface->interactive (s->interface); - while (s) { if (s->interface->filter) - s->interface->filter (s->interface, line); + s->interface->filter (s->interface, line, *syntax); s = s->included_from; } diff --git a/src/libpspp/getl.h b/src/libpspp/getl.h index 34c9b912..27faee64 100644 --- a/src/libpspp/getl.h +++ b/src/libpspp/getl.h @@ -27,22 +27,42 @@ struct string; struct getl_source; +/* Syntax rules that apply to a given source line. */ +enum getl_syntax + { + /* Each line that begins in column 1 starts a new command. A + `+' or `-' in column 1 is ignored to allow visual + indentation of new commands. Continuation lines must be + indented from the left margin. A period at the end of a + line does end a command, but it is optional. */ + GETL_BATCH, + + /* Each command must end in a period or in a blank line. */ + GETL_INTERACTIVE + }; /* An abstract base class for objects which act as line buffers for the PSPP. Ie anything which might contain content for the lexer */ struct getl_interface { - /* Returns true, if the interface is interactive */ + /* Returns true if the interface is interactive, that is, if + it prompts a human user. This property is independent of + the syntax mode returned by the read member function. */ bool (*interactive) (const struct getl_interface *); - /* Read a line from the interface */ - bool (*read) (struct getl_interface *, struct string *); + /* Read a line the intended syntax mode from the interface. + Returns true if succesful, false on failure or at end of + input. */ + bool (*read) (struct getl_interface *, + struct string *, enum getl_syntax *); /* Close and destroy the interface */ void (*close) (struct getl_interface *); - /* Filter for current and all included sources. May be NULL */ - void (*filter) (struct getl_interface *, struct string *line); + /* Filter for current and all included sources, which may + modify the line. Usually null. */ + void (*filter) (struct getl_interface *, + struct string *line, enum getl_syntax); /* Returns the name of the source */ const char * (*name) (const struct getl_interface *); @@ -63,7 +83,7 @@ bool getl_is_interactive (void); bool getl_read_line (bool *interactive); -bool do_read_line (struct string *line, bool *interactive); +bool do_read_line (struct string *line, enum getl_syntax *syntax); void getl_append_source (struct getl_interface *s) ; void getl_include_source (struct getl_interface *s) ; diff --git a/src/libpspp/str.c b/src/libpspp/str.c index 5a554b7b..6840393a 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -358,6 +358,25 @@ ss_alloc_uninit (struct substring *new, size_t cnt) new->length = cnt; } +/* Makes a pool_alloc_unaligned()'d copy of the contents of OLD + in POOL, and stores it in NEW. */ +void +ss_alloc_substring_pool (struct substring *new, struct substring old, + struct pool *pool) +{ + new->string = pool_alloc_unaligned (pool, old.length); + new->length = old.length; + memcpy (new->string, old.string, old.length); +} + +/* Allocates room for a CNT-character string in NEW in POOL. */ +void +ss_alloc_uninit_pool (struct substring *new, size_t cnt, struct pool *pool) +{ + new->string = pool_alloc_unaligned (pool, cnt); + new->length = cnt; +} + /* Frees the string that SS points to. */ void ss_dealloc (struct substring *ss) diff --git a/src/libpspp/str.h b/src/libpspp/str.h index 81cda125..00d8e31f 100644 --- a/src/libpspp/str.h +++ b/src/libpspp/str.h @@ -76,7 +76,8 @@ struct substring }; #define SS_EMPTY_INITIALIZER {NULL, 0} -#define SS_LITERAL_INITIALIZER(LITERAL) {LITERAL, (sizeof LITERAL) - 1} +#define SS_LITERAL_INITIALIZER(LITERAL) \ + {(char *) LITERAL, (sizeof LITERAL) - 1} /* Constructors. These functions do not allocate any memory, so the substrings @@ -90,8 +91,12 @@ struct substring ss_tail (struct substring, size_t); /* Constructors and destructor that allocate and deallocate memory. */ +struct pool; void ss_alloc_substring (struct substring *, struct substring); void ss_alloc_uninit (struct substring *, size_t); +void ss_alloc_substring_pool (struct substring *, struct substring, + struct pool *); +void ss_alloc_uninit_pool (struct substring *, size_t, struct pool *); void ss_dealloc (struct substring *); /* Mutators. diff --git a/src/ui/terminal/ChangeLog b/src/ui/terminal/ChangeLog index 98ce96c7..cebce364 100644 --- a/src/ui/terminal/ChangeLog +++ b/src/ui/terminal/ChangeLog @@ -1,3 +1,8 @@ +Sun Dec 3 11:57:00 2006 Ben Pfaff + + * read-line.c (read_interactive): Always read GETL_INTERACTIVE + lines. + Thu Nov 16 20:46:35 WST 2006 John Darrington * main.c: Connect debugger on errors. diff --git a/src/ui/terminal/read-line.c b/src/ui/terminal/read-line.c index 97427c6c..05b6150f 100644 --- a/src/ui/terminal/read-line.c +++ b/src/ui/terminal/read-line.c @@ -99,11 +99,13 @@ readln_uninitialize (void) static bool -read_interactive (struct getl_interface *s, struct string *line) +read_interactive (struct getl_interface *s, + struct string *line, enum getl_syntax *syntax) { struct readln_source *is = (struct readln_source *) s ; + *syntax = GETL_INTERACTIVE; return is->interactive_func (line, prompt_get_style ()); } -- 2.30.2