From 1339492699ce7e12c9bf9fa17f9d60a66024cbd1 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 2 May 2005 06:21:18 +0000 Subject: [PATCH] New implementation of long variable names. Each variable has a "normal" name, which may be up to 64 bytes long and which is used for all normal operations. Variables may have a "short" name, which is limited to 8 bytes and used only for system and portable file input and output. Make tokid case-preserving. Update most uses of tokid to treat it case-insensitively. Update many commands to deal with long variable names. --- doc/ChangeLog | 5 + doc/configuring.texi | 18 +- doc/data-io.texi | 4 +- doc/expressions.texi | 8 +- doc/language.texi | 532 ++++++++---------------- doc/q2c.texi | 2 +- doc/transformation.texi | 14 +- doc/utilities.texi | 2 +- src/ChangeLog | 201 ++++++++++ src/aggregate.c | 19 +- src/algorithm.c | 30 ++ src/algorithm.h | 7 + src/autorecode.c | 2 +- src/command.c | 10 +- src/compute.c | 8 +- src/count.c | 6 +- src/data-in.c | 2 +- src/data-list.c | 6 +- src/descript.c | 11 +- src/dictionary.c | 556 +++++++++----------------- src/dictionary.h | 20 +- src/expressions/parse.c | 14 +- src/file-handle.q | 13 +- src/file-type.c | 4 +- src/flip.c | 37 +- src/format.c | 2 +- src/get.c | 23 +- src/hash.c | 139 +++---- src/hash.h | 1 + src/lexer.c | 28 +- src/list.q | 16 +- src/loop.c | 4 +- src/matrix-data.c | 9 +- src/modify-vars.c | 6 +- src/pfm-read.c | 24 +- src/pfm-write.c | 15 +- src/pfm-write.h | 3 +- src/recode.c | 2 +- src/repeat.c | 2 +- src/sfm-read.c | 86 +++- src/sfm-write.c | 70 ++-- src/sfm-write.h | 3 +- src/str.c | 22 +- src/str.h | 1 + src/sysfile-info.c | 2 +- src/t-test.q | 15 - src/var-display.c | 26 +- src/var.h | 47 ++- src/vars-atr.c | 75 +++- src/vars-prs.c | 78 +--- src/vector.c | 15 +- tests/ChangeLog | 5 + tests/bugs/agg-crash-2.sh | 4 +- tests/bugs/compute-lv.sh | 22 +- tests/bugs/get.sh | 2 +- tests/bugs/multipass.sh | 2 +- tests/bugs/random.sh | 2 +- tests/bugs/recode-copy-bug-2.stat | 2 +- tests/bugs/t-test-alpha.sh | 2 +- tests/bugs/temporary.sh | 2 +- tests/command/aggregate.sh | 150 +++---- tests/command/autorecod.sh | 8 +- tests/command/count.sh | 4 +- tests/command/examine-extremes.sh | 2 +- tests/command/examine-percentiles.sh | 2 +- tests/command/examine.sh | 2 +- tests/command/file-label.sh | 2 +- tests/command/filter.sh | 4 +- tests/command/flip.sh | 2 +- tests/command/import-export.sh | 2 +- tests/command/lag.sh | 8 +- tests/command/list.sh | 2 +- tests/command/longvars.sh | 31 +- tests/command/loop.sh | 16 +- tests/command/match-files.sh | 12 +- tests/command/oneway-with-splits.sh | 2 +- tests/command/oneway.sh | 2 +- tests/command/rename.sh | 40 +- tests/command/sample.sh | 2 +- tests/command/split-file.sh | 2 +- tests/command/sysfiles.sh | 20 +- tests/command/t-test-1-indep-val.sh | 2 +- tests/command/t-test-1s.sh | 2 +- tests/command/t-test-groups.sh | 2 +- tests/command/t-test-pairs.sh | 2 +- tests/command/tabs.sh | 2 +- tests/command/trimmed-mean.sh | 2 +- tests/command/use.sh | 2 +- tests/stats/descript-basic.sh | 2 +- tests/stats/descript-mean-bug.sh | 2 +- tests/stats/descript-missing.sh | 2 +- tests/stats/percentiles-compatible.sh | 2 +- tests/stats/percentiles-enhanced.sh | 10 +- 93 files changed, 1295 insertions(+), 1340 deletions(-) diff --git a/doc/ChangeLog b/doc/ChangeLog index 23f8cc9a..54f85c32 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,8 @@ +Sun May 1 23:20:42 2005 Ben Pfaff + + * language.texi: Revised lots of text to catch up with changes + that have been in for a long time, and for style. + Sun May 1 15:17:42 WST 2005 John Darrington * configuration.texi: Removed manpage(x) style references, because diff --git a/doc/configuring.texi b/doc/configuring.texi index eedc11d4..44de42f9 100644 --- a/doc/configuring.texi +++ b/doc/configuring.texi @@ -183,7 +183,7 @@ cannot be broken up, unless they are spliced together with a trailing backslash, as described below. @item -Before anything else is done, trailing whitespace is removed. +Before anything else is done, trailing white space is removed. @item When a line ends in a backslash (@samp{\}), the backslash is removed, @@ -191,7 +191,7 @@ and the next line is read and appended to the current line. @itemize @minus @item -Whitespace preceding the backslash is retained. +White space preceding the backslash is retained. @item This rule continues to be applied until the line read does not end in a @@ -222,7 +222,7 @@ Line splicing takes place before comment removal. @end itemize @item -Blank lines, and lines that contain only whitespace, are ignored. +Blank lines, and lines that contain only white space, are ignored. @end itemize @node Environment variables, Output devices, Configuration files, Configuration @@ -646,11 +646,11 @@ The lines in @file{devices} are distinguished in the following manner: @enumerate @item -Leading whitespace is removed. +Leading white space is removed. @item If the resulting line begins with the exact string @code{define}, -followed by one or more whitespace characters, the line is processed as +followed by one or more white space characters, the line is processed as a macro definition. @item @@ -681,7 +681,7 @@ delimiters between tokens and tokens in themselves. The second type is an identifier or string token. Identifiers and strings are equivalent after tokenization, though they are written differently. An identifier is any string of characters other than -whitespace or equals sign. +white space or equals sign. A string is introduced by a single- or double-quote character (@samp{'} or @samp{"}) and, in general, continues until the next occurrence of @@ -733,7 +733,7 @@ hexadecimal value specified. Any number of hex digits is read and interpreted; only the lower 8 bits are used. @end table -Tokens, outside of quoted strings, are delimited by whitespace or equals +Tokens, outside of quoted strings, are delimited by white space or equals signs. @node PostScript driver class, ASCII driver class, Output devices, Configuration @@ -1000,13 +1000,13 @@ used, or @var{double}, in which case double lines are used. Default: @item line-gutter=@var{dimension} -Sets the line gutter, which is the amount of whitespace on either side +Sets the line gutter, which is the amount of white space on either side of lines that border text or graphics objects. @xref{Dimensions}. Default: @code{0.5pt}. @item line-spacing=@var{dimension} -Sets the line spacing, which is the amount of whitespace that separates +Sets the line spacing, which is the amount of white space that separates lines that are side by side, as in a double line. Default: @code{0.5pt}. diff --git a/doc/data-io.texi b/doc/data-io.texi index 2feac6b2..c1c1bb87 100644 --- a/doc/data-io.texi +++ b/doc/data-io.texi @@ -58,8 +58,8 @@ data in a PSPP syntax file. @cmd{DATA LIST} or another input procedure must be used before @cmd{BEGIN DATA} (@pxref{DATA LIST}). @cmd{BEGIN DATA} and @cmd{END DATA} must be used together. @cmd{END DATA} must appear by itself on a single line, with no leading -whitespace and exactly one space between the words @code{END} and -@code{DATA}, followed immediately by the terminal dot, like this: +white space and exactly one space between the words @code{END} and +@code{DATA}, like this: @example END DATA. diff --git a/doc/expressions.texi b/doc/expressions.texi index 8ed76b2c..f6cc4f13 100644 --- a/doc/expressions.texi +++ b/doc/expressions.texi @@ -583,9 +583,9 @@ if @var{padding} does not contain exactly one character. @end deftypefn @cindex strings, trimming -@cindex whitespace, trimming +@cindex white space, trimming @deftypefn {Function} {} LTRIM (@var{string}) -Returns @var{string}, after removing leading spaces. Other whitespace, +Returns @var{string}, after removing leading spaces. Other white space, such as tabs, carriage returns, line feeds, and vertical tabs, is not removed. @end deftypefn @@ -644,10 +644,10 @@ or if @var{padding} does not contain exactly one character. @end deftypefn @cindex strings, trimming -@cindex whitespace, trimming +@cindex white space, trimming @deftypefn {Function} {} RTRIM (@var{string}) Returns @var{string}, after removing trailing spaces. Other types of -whitespace are not removed. +white space are not removed. @end deftypefn @deftypefn {Function} {} RTRIM (@var{string}, @var{padding}) diff --git a/doc/language.texi b/doc/language.texi index 81003710..840f3207 100644 --- a/doc/language.texi +++ b/doc/language.texi @@ -29,228 +29,133 @@ Later chapters will describe individual commands in detail. @cindex language, tokens @cindex tokens @cindex lexical analysis -@cindex lexemes PSPP divides most syntax file lines into series of short chunks -called @dfn{tokens}, @dfn{lexical elements}, or @dfn{lexemes}. These -tokens are then grouped to form commands, each of which tells +called @dfn{tokens}. +Tokens are then grouped to form commands, each of which tells PSPP to take some action---read in data, write out data, perform -a statistical procedure, etc. The process of dividing input into tokens -is @dfn{tokenization}, or @dfn{lexical analysis}. Each type of token is +a statistical procedure, etc. Each type of token is described below. -@cindex delimiters -@cindex whitespace -Tokens must be separated from each other by @dfn{delimiters}. -Delimiters include whitespace (spaces, tabs, carriage returns, line -feeds, vertical tabs), punctuation (commas, forward slashes, etc.), and -operators (plus, minus, times, divide, etc.) Note that while whitespace -only separates tokens, other delimiters are tokens in themselves. - @table @strong @cindex identifiers @item Identifiers -Identifiers are names that specify variable names, commands, or command -details. - -@itemize @bullet -@item -The first character in an identifier must be a letter, @samp{#}, or -@samp{@@}. Some system identifiers begin with @samp{$}, but -user-defined variables' names may not begin with @samp{$}. - -@item -The remaining characters in the identifier must be letters, digits, or -one of the following special characters: +Identifiers are names that typically specify variables, commands, or +subcommands. The first character in an identifier must be a letter, +@samp{#}, or @samp{@@}. The remaining characters in the identifier +must be letters, digits, or one of the following special characters: @example -. _ $ # @@ +@center @. _ $ # @@ @end example -@item -@cindex variable names -@cindex names, variable -Variable names may be up any length up to 64 bytes long. - - -@item @cindex case-sensitivity -Identifiers are not case-sensitive: @code{foobar}, @code{Foobar}, -@code{FooBar}, @code{FOOBAR}, and @code{FoObaR} are different -representations of the same identifier. +Identifiers may be up any length, but only the first 64 bytes are +significant. Identifiers are not case-sensitive: @code{foobar}, +@code{Foobar}, @code{FooBar}, @code{FOOBAR}, and @code{FoObaR} are +different representations of the same identifier. -@item -@cindex keywords -Identifiers other than variable names may be abbreviated to their first -3 characters if this abbreviation is unambiguous. These identifiers are -often called @dfn{keywords}. (Unique abbreviations of 3 or more -characters are also accepted: @samp{FRE}, @samp{FREQ}, and -@samp{FREQUENCIES} are equivalent when the last is a keyword.) - -@item -Whether an identifier is a keyword depends on the context. - -@item -@cindex keywords, reserved -@cindex reserved keywords -Some keywords are reserved. These keywords may not be used in any -context besides those explicitly described in this manual. The reserved -keywords are: +@cindex identifiers, reserved +@cindex reserved identifiers +Some identifiers are reserved. Reserved identifiers may not be used +in any context besides those explicitly described in this manual. The +reserved identifiers are: @example -ALL AND BY EQ GE GT LE LT NE NOT OR TO WITH +@center ALL AND BY EQ GE GT LE LT NE NOT OR TO WITH @end example -@item -Since keywords are identifiers, all the rules for identifiers apply. -Specifically, they must be delimited as are other identifiers: -@code{WITH} is a reserved keyword, but @code{WITHOUT} is a valid -variable name. -@end itemize +@item Keywords +Keywords are a subclass of identifiers that form a fixed part of +command syntax. For example, command and subcommand names are +keywords. Keywords may be abbreviated to their first 3 characters if +this abbreviation is unambiguous. (Unique abbreviations of 3 or more +characters are also accepted: @samp{FRE}, @samp{FREQ}, and +@samp{FREQUENCIES} are equivalent when the last is a keyword.) -@cindex @samp{.} -@cindex period -@cindex variable names, ending with period -@strong{Caution:} It is legal to end a variable name with a period, but -@emph{don't do it!} The variable name will be misinterpreted when it is -the final token on a line: @code{FOO.} will be divided into two separate -tokens, @samp{FOO} and @samp{.}, the @dfn{terminal dot}. -@xref{Commands, , Forming commands of tokens}. +Reserved identifiers are always used as keywords. Other identifiers +may be used both as keywords and as user-defined identifiers, such as +variable names. @item Numbers @cindex numbers @cindex integers @cindex reals -Numbers may be specified as integers or reals. Integers are internally -converted into reals. Scientific notation is not supported. Here are -some examples of valid numbers: +Numbers are expressed in decimal. A decimal point is optional. +Numbers may be expressed in scientific notation by adding @samp{e} and +a base-10 exponent, so that @samp{1.234e3} has the value 1234. Here +are some more examples of valid numbers: @example -1234 3.14159265359 .707106781185 8945. +-5 3.14159265359 1e100 -.707 8945. @end example -@strong{Caution:} The last example will be interpreted as two tokens, -@samp{8945} and @samp{.}, if it is the last token on a line. +Negative numbers are expressed with a @samp{-} prefix. However, in +situations where a literal @samp{-} token is expected, what appears to +be a negative number is treated as @samp{-} followed by a positive +number. + +No white space is allowed within a number token, except for horizontal +white space between @samp{-} and the rest of the number. + +The last example above, @samp{8945.} will be interpreted as two +tokens, @samp{8945} and @samp{.}, if it is the last token on a line. +@xref{Commands, , Forming commands of tokens}. @item Strings @cindex strings @cindex @samp{'} @cindex @samp{"} @cindex case-sensitivity -Strings are literal sequences of characters enclosed in pairs of single -quotes (@samp{'}) or double quotes (@samp{"}). - -@itemize @bullet -@item -Whitespace and case of letters @emph{are} significant inside strings. -@item -Whitespace characters inside a string are not delimiters. -@item -To include single-quote characters in a string, enclose the string in -double quotes. -@item -To include double-quote characters in a string, enclose the string in -single quotes. -@item -It is not possible to put both single- and double-quote characters -inside one string. -@end itemize - -@item Hexstrings -@cindex hexstrings -Hexstrings are string variants that use hex digits to specify -characters. - -@itemize @bullet -@item -A hexstring may be used anywhere that an ordinary string is allowed. - -@item -@cindex @samp{X'} -@cindex @samp{'} -A hexstring begins with @samp{X'} or @samp{x'}, and ends with @samp{'}. - -@cindex whitespace -@item -No whitespace is allowed between the initial @samp{X} and @samp{'}. - -@item -Double quotes @samp{"} may be used in place of single quotes @samp{'} if -done in both places. - -@item -Each pair of hex digits is internally changed into a single character -with the given value. - -@item -If there is an odd number of hex digits, the missing last digit is -assumed to be @samp{0}. - -@item -@cindex portability -@strong{Please note:} Use of hexstrings is nonportable because the same -numeric values are associated with different glyphs by different -operating systems. Therefore, their use should be confined to syntax -files that will not be widely distributed. +Strings are literal sequences of characters enclosed in pairs of +single quotes (@samp{'}) or double quotes (@samp{"}). To include the +character used for quoting in the string, double it, e.g.@: +@samp{'it''s an apostrophe'}. White space and case of letters are +significant inside strings. + +Strings can be concatenated using @samp{+}, so that @samp{"a" + 'b' + +'c'} is equivalent to @samp{'abc'}. Concatenation is useful for +splitting a single string across multiple source lines. The maximum +length of a string, after concatenation, is 255 characters. + +Strings may also be expressed as hexadecimal, octal, or binary +character values by prefixing the initial quote character by @samp{X}, +@samp{O}, or @samp{B} or their lowercase equivalents. Each pair, +triplet, or octet of characters, according to the radix, is +transformed into a single character with the given value. If there is +an incomplete group of characters, the missing final digits are +assumed to be @samp{0}. These forms of strings are nonportable +because numeric values are associated with different characters by +different operating systems. Therefore, their use should be confined +to syntax files that will not be widely distributed. -@item @cindex characters, reserved @cindex 0 -@cindex whitespace -@strong{Please note also:} The character with value 00 is reserved for +@cindex white space +The character with value 00 is reserved for internal use by PSPP. Its use in strings causes an error and -replacement with a blank space (in ASCII, hex 20, decimal 32). -@end itemize - -@item Punctuation -@cindex punctuation -Punctuation separates tokens; punctuators are delimiters. These are the -punctuation characters: +replacement by a space character. -@example -, / = ( ) -@end example - -@item Operators +@item Punctuators and Operators +@cindex punctuators @cindex operators -Operators describe mathematical operations. Some operators are delimiters: +These tokens are the punctuators and operators: @example -( ) + - * / ** +@center , / = ( ) + - * / ** < <= <> > >= ~= & | . @end example -Many of the above operators are also punctuators. Punctuators are -distinguished from operators by context. - -The other operators are all reserved keywords. None of these are -delimiters: - -@example -AND EQ GE GT LE LT NE OR -@end example - -@item Terminal Dot -@cindex terminal dot -@cindex dot, terminal -@cindex period -@cindex @samp{.} -A period (@samp{.}) at the end of a line (except for whitespace) is one -type of a @dfn{terminal dot}, although not every terminal dot is a -period at the end of a line. @xref{Commands, , Forming commands of -tokens}. A period is a terminal dot @emph{only} -when it is at the end of a line; otherwise it is part of a -floating-point number. (A period outside a number in the middle of a -line is an error.) - -@quotation -@cindex terminal dot, changing -@cindex dot, terminal, changing -@strong{Please note:} The character used for the @dfn{terminal dot} -can be changed with @cmd{SET}'s ENDCMD subcommand (@pxref{SET}). This -is strongly discouraged, and throughout all the remainder of this -manual it will be assumed that the default setting is in effect. -@end quotation - +Most of these appear within the syntax of commands, but the period +(@samp{.}) punctuator is used only at the end of a command. It is a +punctuator only as the last character on a line (except white space). +When it is the last non-space character on a line, a period is not +treated as part of another token, even if it would otherwise be part +of e.g.@: an identifier or a floating-point number. + +Actually, the character that ends a command can be changed with +@cmd{SET}'s ENDCMD subcommand (@pxref{SET}), but we do not recommend +doing so. Throughout the remainder of this manual we will assume that +the default setting is in effect. @end table @node Commands, Types of Commands, Tokens, Language @@ -260,92 +165,41 @@ manual it will be assumed that the default setting is in effect. @cindex language, command structure @cindex commands, structure -Most PSPP commands share a common structure, diagrammed below: - -@example -@var{cmd}@dots{} [@var{sbc}[=][@var{spec} [[,]@var{spec}]@dots{}]] [[/[=][@var{spec} [[,]@var{spec}]@dots{}]]@dots{}]. -@end example - -@cindex @samp{[ ]} -In the above, rather daunting, expression, pairs of square brackets -(@samp{[ ]}) indicate optional elements, and names such as @var{cmd} -indicate parts of the syntax that vary from command to command. -Ellipses (@samp{...}) indicate that the preceding part may be repeated -an arbitrary number of times. Let's pick apart what it says above: - -@itemize @bullet -@cindex commands, names -@item -A command begins with a command name of one or more keywords, such as -@cmd{FREQUENCIES}, @cmd{DATA LIST}, or @cmd{N OF CASES}. @var{cmd} -may be abbreviated to its first word if that is unambiguous; each word -in @var{cmd} may be abbreviated to a unique prefix of three or more -characters as described above. - -@cindex subcommands -@item -The command name may be followed by one or more @dfn{subcommands}: - -@itemize @minus -@item -Each subcommand begins with a unique keyword, indicated by @var{sbc} -above. This is analogous to the command name. - -@item -The subcommand name is optionally followed by an equals sign (@samp{=}). - -@item -Some subcommands accept a series of one or more specifications -(@var{spec}), optionally separated by commas. - -@item -Each subcommand must be separated from the next (if any) by a forward -slash (@samp{/}). -@end itemize - -@cindex dot, terminal -@cindex terminal dot -@item -Each command must be terminated with a @dfn{terminal dot}. -The terminal dot may be given one of three ways: - -@itemize @minus -@item -(most commonly) A period character at the very end of a line, as -described above. - -@item -(only if NULLINE is on: @xref{SET, , Setting user preferences}, for more -details.) A completely blank line. - -@item -(in batch mode only) Any line that is not indented from the left side of -the page causes a terminal dot to be inserted before that line. -Therefore, each command begins with a line that is flush left, followed -by zero or more lines that are indented one or more characters from the -left margin. - -In batch mode, PSPP will ignore a plus sign, minus sign, or period -(@samp{+}, @samp{@minus{}}, or @samp{.}) as the first character in a -line. Any of these characters as the first character on a line will -begin a new command. This allows for visual indentation of a command -without that command being considered part of the previous command. - -PSPP is in batch mode when it is reading input from a file, rather -than from an interactive user. Note that the other forms of the -terminal dot may also be used in batch mode. +Most PSPP commands share a common structure. A command begins with a +command name, such as @cmd{FREQUENCIES}, @cmd{DATA LIST}, or @cmd{N OF +CASES}. The command name may be abbreviated to its first word, and +each word in the command name may be abbreviated to its first three +or more characters, where these abbreviations are unambiguous. + +The command name may be followed by one or more @dfn{subcommands}. +Each subcommand begins with a subcommand name, which may be +abbreviated to its first three letters. Some subcommands accept a +series of one or more specifications, which follow the subcommand name +and, optionally separated from it by an equals sign (@samp{=}), and +optionally separated from each other by commas. Each subcommand must +be separated from the next (if any) by a forward slash (@samp{/}). + +There are multiple ways to mark the end of a command. The most common +way is to end the last line of the command with a period (@samp{.}) as +described in the previous section (@pxref{Tokens}). A blank line, or +one that consists only of white space or comments, also ends a command +by default, although you can use the NULLINE subcommand of @cmd{SET} +to disable this feature (@pxref{SET}). + +In batch mode only, that is, when reading commands from a file instead +of an interactive user, any line that contains a non-space character +in the leftmost column begins a new command. Thus, each command +consists of a flush-left line followed by any number of lines indented +from the left margin. In this mode, a plus sign, minus sign, or +period (@samp{+}, @samp{@minus{}}, or @samp{.}) as the first character +in a line is ignored and causes that line to begin a new command, +which allows for visual indentation of a command without that command +being considered part of the previous command. Sometimes, one encounters syntax files that are intended to be -interpreted in interactive mode rather than batch mode (for instance, -this can happen if a session log file is used directly as a syntax -file). When this occurs, use the @samp{-i} command line option to force -interpretation in interactive mode (@pxref{Language control options}). -@end itemize -@end itemize - -PSPP ignores empty commands when they are generated by the above -rules. Note that, as a consequence of these rules, each command must -begin on a new line. +interpreted in interactive mode rather than batch mode. When this +occurs, use the @samp{-i} command line option to force interpretation +in interactive mode (@pxref{Language control options}). @node Types of Commands, Order of Commands, Commands, Language @section Types of Commands @@ -379,8 +233,8 @@ are not carried out until a procedure is executed. @item Restricted transformations @cindex restricted transformations -Same as transformations for most purposes. @xref{Order of Commands}, for a -detailed description of the differences. +Transformations that cannot appear in certain contexts. @xref{Order +of Commands}, for details. @item Procedures @cindex procedures @@ -395,11 +249,11 @@ active file (the data) to be read. @cindex commands, ordering @cindex order of commands -PSPP does not place many restrictions on ordering of commands. -The main restriction is that variables must be defined with one of the -file-definition commands before they are otherwise referred to. +PSPP does not place many restrictions on ordering of commands. The +main restriction is that variables must be defined they are otherwise +referenced. This section describes the details of command ordering, +but most users will have no need to refer to them. -Of course, there are specific rules, for those who are interested. PSPP possesses five internal states, called initial, INPUT PROGRAM, FILE TYPE, transformation, and procedure states. (Please note the distinction between the @cmd{INPUT PROGRAM} and @cmd{FILE TYPE} @@ -413,7 +267,7 @@ own rules for state transitions: @item Utility commands @itemize @bullet @item -Legal in all states. +Valid in any state. @item Do not cause state transitions. Exception: when @cmd{N OF CASES} is executed in the procedure state, it causes a transition to the @@ -423,7 +277,7 @@ transformation state. @item @cmd{DATA LIST} @itemize @bullet @item -Legal in all states. +Valid in any state. @item When executed in the initial or procedure state, causes a transition to the transformation state. @@ -496,10 +350,11 @@ Cause a transition to the procedure state. PSPP includes special support for unknown numeric data values. Missing observations are assigned a special value, called the @dfn{system-missing value}. This ``value'' actually indicates the -absence of value; it means that the actual value is unknown. Procedures +absence of a value; it means that the actual value is unknown. Procedures automatically exclude from analyses those observations or cases that -have missing values. Whether single observations or entire cases are -excluded depends on the procedure. +have missing values. Details of missing value exclusion depend on the +procedure and can often be controlled by the user; refer to +descriptions of individual procedures for details. The system-missing value exists only for numeric variables. String variables always have a defined value, even if it is only a string of @@ -543,9 +398,29 @@ Each variable has a number of attributes, including: @table @strong @item Name -This is an identifier. Each variable must have a different name. +An identifier, up to 64 bytes long. Each variable must have a different name. @xref{Tokens}. +Some system variable names begin with @samp{$}, but user-defined +variables' names may not begin with @samp{$}. + +@cindex @samp{.} +@cindex period +@cindex variable names, ending with period +The final character in a variable name should not be @samp{.}, because +such an identifier will be misinterpreted when it is the final token +on a line: @code{FOO.} will be divided into two separate tokens, +@samp{FOO} and @samp{.}, indicating end-of-command. @xref{Tokens}. + +@cindex @samp{_} +The final character in a variable name should not be @samp{_}, because +some such identifiers are used for special purposes by PSPP +procedures. + +As with all PSPP identifiers, variable names are not case-sensitive. +PSPP capitalizes variable names on output the same way they were +capitalized at their point of definition in the input. + @cindex variables, type @cindex type of variables @item Type @@ -559,11 +434,9 @@ fewer are called @dfn{short string variables}. Short string variables can be used in many procedures where @dfn{long string variables} (those with widths greater than 8) are not allowed. -@quotation -@strong{Please note:} Certain systems may consider strings longer than 8 +Certain systems may consider strings longer than 8 characters to be short strings. Eight characters represents a minimum figure for the maximum length of a short string. -@end quotation @item Position Variables in the dictionary are arranged in a specific order. @@ -657,81 +530,30 @@ Page width, in characters, in format F3. @cindex TO convention @cindex convention, TO -There are several ways to specify a set of variables: - -@enumerate -@item -(Most commonly.) List the variable names one after another, optionally -separating them by commas. - -@cindex @code{TO} -@item -(This method cannot be used on commands that define the dictionary, such -as @cmd{DATA LIST}.) The syntax is the names of two existing variables, -separated by the reserved keyword @code{TO}. The meaning is to include -every variable in the dictionary between and including the variables -specified. For instance, if the dictionary contains six variables with -the names @code{ID}, @code{X1}, @code{X2}, @code{GOAL}, @code{MET}, and +To refer to a set of variables, list their names one after another. +Optionally, their names may be separated by commas. To include a +range of variables from the dictionary in the list, write the name of +the first and last variable in the range, separated by @code{TO}. For +instance, if the dictionary contains six variables with the names +@code{ID}, @code{X1}, @code{X2}, @code{GOAL}, @code{MET}, and @code{NEXTGOAL}, in that order, then @code{X2 TO MET} would include variables @code{X2}, @code{GOAL}, and @code{MET}. -@item -(This method can be used only on commands that define the dictionary, -such as @cmd{DATA LIST}.) It is used to define sequences of variables -that end in consecutive integers. The syntax is two identifiers that -end in numbers. This method is best illustrated with examples: - -@itemize @bullet -@item -The syntax @code{X1 TO X5} defines 5 variables: - -@itemize @minus -@item -X1 -@item -X2 -@item -X3 -@item -X4 -@item -X5 -@end itemize - -@item -The syntax @code{ITEM0008 TO ITEM0013} defines 6 variables: - -@itemize @minus -@item -ITEM0008 -@item -ITEM0009 -@item -ITEM0010 -@item -ITEM0011 -@item -ITEM0012 -@item -ITEM0013 -@end itemize - -@item -Each of the syntaxes @code{QUES001 TO QUES9} and @code{QUES6 TO QUES3} -are invalid, although for different reasons, which should be evident. -@end itemize - -Note that after a set of variables has been defined with @cmd{DATA LIST} -or another command with this method, the same set can be referenced on +Commands that define variables, such as @cmd{DATA LIST}, give +@code{TO} an alternate meaning. With these commands, @code{TO} define +sequences of variables whose names end in consecutive integers. The +syntax is two identifiers that begin with the same root and end with +numbers, separated by @code{TO}. The syntax @code{X1 TO X5} defines 5 +variables, named @code{X1}, @code{X2}, @code{X3}, @code{X4}, and +@code{X5}. The syntax @code{ITEM0008 TO ITEM0013} defines 6 +variables, named @code{ITEM0008}, @code{ITEM0009}, @code{ITEM0010}, +@code{ITEM0011}, @code{ITEM0012}, and @code{ITEM00013}. The syntaxes +@code{QUES001 TO QUES9} and @code{QUES6 TO QUES3} are invalid. + +After a set of variables has been defined with @cmd{DATA LIST} or +another command with this method, the same set can be referenced on later commands using the same syntax. -@item -The above methods can be combined, either one after another or delimited -by commas. For instance, the combined syntax @code{A Q5 TO Q8 X TO Z} -is legal as long as each part @code{A}, @code{Q5 TO Q8}, @code{X TO Z} -is individually legal. -@end enumerate - @node Input/Output Formats, Scratch Variables, Sets of Variables, Variables @subsection Input and Output Formats @@ -889,7 +711,7 @@ definitions of the elements that make up their formats will be helpful: @table @dfn @item leader -All formats accept an optional whitespace leader. +All formats accept an optional white space leader. @item day An integer between 1 and 31 representing the day of month. @@ -898,7 +720,7 @@ An integer between 1 and 31 representing the day of month. An integer representing a number of days. @item date-delimiter -One or more characters of whitespace or the following characters: +One or more characters of white space or the following characters: @code{- / . ,} @item month @@ -934,7 +756,7 @@ An integer between 1 and 53 representing a week within a year. The letters @samp{wk} in any case. @item time-delimiter -At least one characters of whitespace or @samp{:} or @samp{.}. +At least one characters of white space or @samp{:} or @samp{.}. @item hour An integer greater than 0 representing an hour. @@ -953,13 +775,13 @@ An integer between 0 and 23 representing an hour within a day. At least the first two characters of an English day word. @item spaces -Any amount or no amount of whitespace. +Any amount or no amount of white space. @item sign An optional positive or negative sign. @item trailer -All formats accept an optional whitespace trailer. +All formats accept an optional white space trailer. @end table The date input formats are strung together from the above pieces. On @@ -1162,11 +984,9 @@ A single variable name. Operators and punctuators. @cindex @code{.} -@cindex terminal dot -@cindex dot, terminal @item @code{.} -The terminal dot. This is not necessarily an actual dot in the syntax -file: @xref{Commands}, for more details. +The end of the command. This is not necessarily an actual dot in the +syntax file: @xref{Commands}, for more details. @end table @item diff --git a/doc/q2c.texi b/doc/q2c.texi index 0e25019b..3bb3ade4 100644 --- a/doc/q2c.texi +++ b/doc/q2c.texi @@ -104,7 +104,7 @@ backslash within a string. @item Special character -Other characters, other than whitespace, constitute tokens in +Other characters, other than white space, constitute tokens in themselves. @end table diff --git a/doc/transformation.texi b/doc/transformation.texi index d94c1366..e722b9c4 100644 --- a/doc/transformation.texi +++ b/doc/transformation.texi @@ -386,7 +386,8 @@ specified are discarded. If the VARIABLES subcommand is omitted, all variables are selected for transposition. The variables specified by NEWNAMES, which must be a string variable, is -used to give names to the variables created by @cmd{FLIP}. If +used to give names to the variables created by @cmd{FLIP}. Only the +first 8 characters of the variable are used. If NEWNAMES is not specified then the default is a variable named CASE_LBL, if it exists. If it does not then the variables created by FLIP are named VAR000 @@ -400,11 +401,12 @@ extensions are added, starting with 1, until a unique name is found or there are no remaining possibilities. If the latter occurs then the FLIP operation aborts. -The resultant dictionary contains a CASE_LBL variable, which stores the -names of the variables in the dictionary before the transposition. If -the active file is subsequently transposed using @cmd{FLIP}, this -variable can -be used to recreate the original variable names. +The resultant dictionary contains a CASE_LBL variable, a string +variable of width 8, which stores the names of the variables in the +dictionary before the transposition. Variables names longer than 8 +characters are truncated. If the active file is subsequently +transposed using @cmd{FLIP}, this variable can be used to recreate the +original variable names. FLIP honors @cmd{N OF CASES} (@pxref{N OF CASES}). It ignores @cmd{TEMPORARY} (@pxref{TEMPORARY}), so that ``temporary'' diff --git a/doc/utilities.texi b/doc/utilities.texi index 67035315..ea1cfbb2 100644 --- a/doc/utilities.texi +++ b/doc/utilities.texi @@ -352,7 +352,7 @@ files. The data input subcommands are @table @asis @item BLANKS This is the value assigned to an item data item that is empty or -contains only whitespace. An argument of SYSMIS or '.' will cause the +contains only white space. An argument of SYSMIS or '.' will cause the system-missing value to be assigned to null items. This is the default. Any real value may be assigned. diff --git a/src/ChangeLog b/src/ChangeLog index 897f5e3b..33134d8a 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,204 @@ +Sun May 1 23:00:19 2005 Ben Pfaff + + * var-display.c: (cmd_variable_alignment) Fix memory leak. + (cmd_variable_level) Ditto. + +Sun May 1 22:49:04 2005 Ben Pfaff + + Hash table had buggy deletion function. The fix required changing + other functions to do probing in the required order. + + * hash.c: (locate_matching_entry) Rewrite and change interface. + (hsh_rehash) Rewrite to use locate_matching_entry(). + (hsh_probe) Ditto. + (hsh_find) Ditto. + (hsh_delete) Ditto. Also, fix stupid bugs. + +Sun May 1 22:24:58 2005 Ben Pfaff + + * dictionary.c: (dict_clone) Properly copy vectors. + +Sun May 1 22:07:58 2005 Ben Pfaff + + New implementation of long variable names. Each variable has a + "normal" name, which may be up to 64 bytes long and which is used + for all normal operations. Variables may have a "short" name, + which is limited to 8 bytes and used only for system and portable + file input and output. + + Make tokid case-preserving. Update most uses of tokid to treat it + case-insensitively. + + Update many commands to deal with long variable names. + + * autorecode.c: (cmd_autorecode) Use strcasecmp() instead of strcmp(). + + * command.c: (cmd_parse) Ditto. + (match_strings) Use toupper() before comparing characters. + (conflicting_3char_prefixes) Use mm_case_compare() instead of + memcmp(). + (cmd_match_words) Ditto. + + * compute.c: (lvalue_parse) Use st_trim_copy() instead of + strncpy(). + + * count.c: (struct cnt_var_info) Change n[] to fit long var name. + Use st_trim_copy() instead of strcpy(). + + * data-in.c: (parse_enum) Use mm_case_compare() instead of + memcmp(). + + * data-list.c: (struct dls_var_spec) Change name[] to fit long var + name. + (parse_free) Use st_trim_copy() instead of strcpy(). + + * descript.c: (struct dsc_var) Change z_name[] to fit long var + name. + (try_name) Use strcasecmp() instead of strcmp(). + (generate_z_varname) Use st_trim_copy() instead of strcpy(). + (descriptives_compare_dsc_vars) Use strcasecmp() instead of + strcmp(). + + * dictionary.c: (struct dictionary) Removed `long_name_tab' + member. + (compare_long_names) Removed. + (hash_long_name) Removed. + (dict_create) Don't initialize `long_name_tab' member. + (dict_clone) Copy short names into new dictionary. + (dict_clear) Don't clear `long_name_tab' member. + (dict_get_varname_block) Removed. + (dict_add_longvar_entry) Removed. + (free_nte) Removed. + (dict_destroy) Don't destroy `long_name_tab' member. + (dict_create_var_from_short) Removed. + (dict_create_var_x) Removed. + (dict_create_var) Get rid of longname handling. + Clear short name. + (dict_clone_var) Get rid of longname parameter and longname + handling. + (dict_lookup_var) Get rid of longname handling. + (dict_reorder_var) New function. + (dict_rename_var) Clear short name. + (dict_rename_vars) Get rid of longname handling. Clear short + names. + (dict_create_vector) Support long vector names. + (dict_lookup_vector) Use strcasecmp() instead of strcmp(). + (quasi_base27) Removed. + (make_short_name) Removed. + (compare_strings) New function. + (hash_string) New function. + (dict_assign_short_names) New function. + + * file-handle.q: (get_handle_with_name) Use strcasecmp() instead + of strcmp(). + (get_handle_for_filename) Support long handle names. + + * file-type.c: (struct col_spec) Make `name' fit long var names. + (cmd_file_type) Use strcasecmp() instead of strcmp(). + + * flip.c: (make_new_var) Rewrite. + (flip_sink_write) Use st_trim_copy() instead of strncpy(). + + * format.c: (parse_format_specifier_name) Use mm_case_compare() + instead of memcmp(). + + * get.c: (cmd_save_internal) Rephrase. + (rename_variables) Drop test for identical variable name. + (struct mtf_proc) Change `first', `last' to fit long var name. + + * hash.c: (hsh_hash_case_string) New function for case-insensitive + string hashing. + + * lexer.c: (restore_token) Use st_trim_copy() instead of + strncpy(). + (lex_get) Don't uppercase string when copying into tokid. + (lex_put_back_id) Use st_trim_copy() instead of + strncpy(). + + * list.q: (determine_layout) Consider length of variable names in + choosing vertical layout. + + * matrix-data.c: (cmd_matrix_data) Use strcasecmp() instead of + strcmp(). + (string_to_content_type) Ditto. + + * modify-vars.c: (compare_variables_given_ordering) Ditto. + (struct var_renaming) Change `new_name' to fit long var name. + (compare_var_renaming_by_new_name) Use strcasecmp() instead of + strcmp(). + + * pfm-read.c: (read_variables) Disallow system variables in system + files. + (write_variables) Call dict_assign_short_names() and use + short_name[] members. + + * repeat.c: (internal_cmd_do_repeat) Use strcasecmp() instead of + strcmp(). + + * sfm-read.c: (sfm_open_reader) Rewrite code for long variable + map. Reorder variables into same order as long variable map. + (read_variables) Set short name. + + * sfm-write.c: (sfm_open_writer) Call dict_assign_short_names(). + (write_variable) Use st_bare_pad_copy(). + (write_longvar_table) Rewrite. + + * str.c: (mm_case_compare) New function. + + * sysfile-info.c: (compare_vectors_by_name) Use strcasecmp() + instead of strcmp(). + + * t-test.q: (tts_custom_groups) Remove redundant test. + (tts_custom_pairs) Ditto. + + * var.h: (struct variable) Change `name' to fit long var names. + Remove `longname'. Add `short_name' member. Reorder some + variables. + (struct name_table_entry) Removed. + (struct vector) Change `name' to fit long vector names. + + * vars-atr.c: (var_is_valid_name) Allow long var names. + (compare_var_names) Use strcasecmp() instead of strcmp(). + (compare_var_ptr_names) Ditto. + (hash_var_name) Use hsh_hash_case_string(). + (hash_var_ptr_name) Ditto. + (var_set_short_name) New function. + (var_clear_short_name) New function. + (var_set_short_name_suffix) New function. + + * vars-prs.c: (parse_DATA_LIST_vars) Support long names. + Use strcasecmp() instead of strcmp(). + (struct array_var_set) Removed `longname_tab'. + (array_var_set_lookup_var_idx) Drop longname_tab support. + (array_var_set_destroy) Don't destroy `longname_tab'. + (var_set_create_from_array) Don't create `longname_tab'. + + * vector.c: (cmd_vector) Use strcasecmp() instead of strcmp(). + Support long names. + + * expressions/parse.c: (word_matches) Use mm_case_compare() + instead of memcmp(). + (compare_strings) New function. + (lookup_function) Use compare_strings() instead of strcmp(). + +Sun May 1 22:07:43 2005 Ben Pfaff + + * algorithm.c: (move_element) New function. + +Sun May 1 22:05:35 2005 Ben Pfaff + + * aggregate.c: (parse_aggregate_functions) Always initialize + destvar. + +Sun May 1 22:03:47 2005 Ben Pfaff + + * aggregate.c: (cmd_aggregate) Use dict_clone_var_assert(). + + * dictionary.c: (dict_clone) Ditto. + (dict_clone_var_assert) New function. + + * get.c: (mtf_merge_dictionary) Use dict_clone_var_assert(). + Sun May 1 15:05:54 WST 2005 John Darrington * error.c: Added a string for the compiler version to the diff --git a/src/aggregate.c b/src/aggregate.c index b7b6836e..154256f5 100644 --- a/src/aggregate.c +++ b/src/aggregate.c @@ -213,13 +213,8 @@ cmd_aggregate (void) goto error; for (i = 0; i < agr.break_var_cnt; i++) - { - struct variable *v = dict_clone_var (agr.dict, agr.break_vars[i], - agr.break_vars[i]->name, - agr.break_vars[i]->longname - ); - assert (v != NULL); - } + dict_clone_var_assert (agr.dict, agr.break_vars[i], + agr.break_vars[i]->name); /* BREAK must follow the options. */ break; @@ -455,7 +450,8 @@ parse_aggregate_functions (struct agr_proc *agr) arg[i].f = tokval; type = NUMERIC; } else { - msg (SE, _("Missing argument %d to %s."), i + 1, function->name); + msg (SE, _("Missing argument %d to %s."), i + 1, + function->name); goto error; } @@ -543,10 +539,11 @@ parse_aggregate_functions (struct agr_proc *agr) } if (function->alpha_type == ALPHA) - destvar = dict_clone_var (agr->dict, v->src, 0, dest[i] ); - else if (v->src->type == NUMERIC - || function->alpha_type == NUMERIC) + destvar = dict_clone_var (agr->dict, v->src, dest[i]); + else { + assert (v->src->type == NUMERIC + || function->alpha_type == NUMERIC); destvar = dict_create_var (agr->dict, dest[i], 0); if (destvar != NULL) { diff --git a/src/algorithm.c b/src/algorithm.c index 209b0b0f..ca427a0d 100644 --- a/src/algorithm.c +++ b/src/algorithm.c @@ -393,6 +393,36 @@ remove_element (void *array, size_t count, size_t size, remove_range (array, count, size, idx, 1); } +/* Moves an element in ARRAY, which consists of COUNT elements of + SIZE bytes each, from OLD_IDX to NEW_IDX, shifting around + other elements as needed. Runs in O(abs(OLD_IDX - NEW_IDX)) + time. */ +void +move_element (void *array_, size_t count, size_t size, + size_t old_idx, size_t new_idx) +{ + assert (array_ != NULL || count == 0); + assert (old_idx < count); + assert (new_idx < count); + + if (old_idx != new_idx) + { + char *array = array_; + char *element = xmalloc (size); + char *new = array + new_idx * size; + char *old = array + old_idx * size; + + memcpy (element, old, size); + if (new < old) + memmove (new + size, new, (old_idx - new_idx) * size); + else + memmove (old, old + size, (new_idx - old_idx) * size); + memcpy (new, element, size); + + free (element); + } +} + /* A predicate and its auxiliary data. */ struct pred_aux { diff --git a/src/algorithm.h b/src/algorithm.h index 5482de15..10e589a1 100644 --- a/src/algorithm.h +++ b/src/algorithm.h @@ -107,6 +107,13 @@ void remove_range (void *array, size_t count, size_t size, void remove_element (void *array, size_t count, size_t size, size_t idx); +/* Moves an element in ARRAY, which consists of COUNT elements of + SIZE bytes each, from OLD_IDX to NEW_IDX, shifting around + other elements as needed. Runs in O(abs(OLD_IDX - NEW_IDX)) + time. */ +void move_element (void *array, size_t count, size_t size, + size_t old_idx, size_t new_idx); + /* Removes elements equal to ELEMENT from ARRAY, which consists of COUNT elements of SIZE bytes each. Returns the number of remaining elements. AUX is passed to COMPARE as auxiliary diff --git a/src/autorecode.c b/src/autorecode.c index a7f4febc..0b6857dd 100644 --- a/src/autorecode.c +++ b/src/autorecode.c @@ -151,7 +151,7 @@ cmd_autorecode (void) goto lossage; } for (j = 0; j < i; j++) - if (!strcmp (arc.dst_names[i], arc.dst_names[j])) + if (!strcasecmp (arc.dst_names[i], arc.dst_names[j])) { msg (SE, _("Duplicate variable name %s among target variables."), arc.dst_names[i]); diff --git a/src/command.c b/src/command.c index eb5b1654..95151fd5 100644 --- a/src/command.c +++ b/src/command.c @@ -183,7 +183,7 @@ cmd_parse (void) return CMD_SUCCESS; /* Parse comments. */ - if ((token == T_ID && !strcmp (tokid, "COMMENT")) + if ((token == T_ID && !strcasecmp (tokid, "COMMENT")) || token == T_EXP || token == '*' || token == '[') { lex_skip_comment (); @@ -281,7 +281,7 @@ match_strings (const char *a, size_t a_len, while (a_len > 0 && b_len > 0) { /* Mismatch always returns zero. */ - if (*a++ != *b++) + if (toupper ((unsigned char) *a++) != toupper ((unsigned char) *b++)) return 0; /* Advance. */ @@ -341,14 +341,14 @@ conflicting_3char_prefixes (const char *a, const char *b) assert (aw != NULL && bw != NULL); /* Words that are the same don't conflict. */ - if (aw_len == bw_len && !memcmp (aw, bw, aw_len)) + if (aw_len == bw_len && !mm_case_compare (aw, bw, aw_len)) return 0; /* Words that are otherwise the same in the first three letters do conflict. */ return ((aw_len > 3 && bw_len > 3) || (aw_len == 3 && bw_len > 3) - || (bw_len == 3 && aw_len > 3)) && !memcmp (aw, bw, 3); + || (bw_len == 3 && aw_len > 3)) && !mm_case_compare (aw, bw, 3); } /* Returns nonzero if CMD can be confused with another command @@ -390,7 +390,7 @@ cmd_match_words (const struct command *cmd, word != NULL && word_idx < word_cnt; word = find_word (word + word_len, &word_len), word_idx++) if (word_len != strlen (words[word_idx]) - || memcmp (word, words[word_idx], word_len)) + || mm_case_compare (word, words[word_idx], word_len)) { size_t match_chars = match_strings (word, word_len, words[word_idx], diff --git a/src/compute.c b/src/compute.c index 945412e7..24148fe6 100644 --- a/src/compute.c +++ b/src/compute.c @@ -315,7 +315,7 @@ struct lvalue struct expression *element; /* Destination vector element, or NULL. */ }; -/* Parses the target variable or vector elector into a new +/* Parses the target variable or vector element into a new `struct lvalue', which is returned. */ static struct lvalue * lvalue_parse (void) @@ -353,8 +353,7 @@ lvalue_parse (void) else { /* Variable name. */ - strncpy (lvalue->var_name, tokid, LONG_NAME_LEN); - lvalue->var_name[LONG_NAME_LEN] = '\0'; + st_trim_copy (lvalue->var_name, tokid, sizeof lvalue->var_name); lex_get (); } return lvalue; @@ -371,8 +370,7 @@ lvalue_get_type (const struct lvalue *lvalue) { if (lvalue->vector == NULL) { - struct variable *var - = dict_lookup_var (default_dict, lvalue->var_name); + struct variable *var = dict_lookup_var (default_dict, lvalue->var_name); if (var == NULL) return NUMERIC; else diff --git a/src/count.c b/src/count.c index 1697c0fb..82085d6f 100644 --- a/src/count.c +++ b/src/count.c @@ -108,7 +108,7 @@ struct cnt_var_info struct cnt_var_info *next; struct variable *d; /* Destination variable. */ - char n[SHORT_NAME_LEN + 1]; /* Name of dest var. */ + char n[LONG_NAME_LEN + 1]; /* Name of dest var. */ struct counting *c; /* The counting specifications. */ }; @@ -145,7 +145,7 @@ cmd_count (void) cnt->d = NULL; cnt->c = NULL; - /* Get destination struct variable, or at least its name. */ + /* Get destination variable, or at least its name. */ if (!lex_force_id ()) goto fail; cnt->d = dict_lookup_var (default_dict, tokid); @@ -158,7 +158,7 @@ cmd_count (void) } } else - strcpy (cnt->n, tokid); + st_trim_copy (cnt->n, tokid, sizeof cnt->n); lex_get (); if (!lex_force_match ('=')) diff --git a/src/data-in.c b/src/data-in.c index f7224cc0..8f36dcb0 100644 --- a/src/data-in.c +++ b/src/data-in.c @@ -760,7 +760,7 @@ parse_enum (struct data_in *i, const char *what, if ((ep->can_abbreviate && lex_id_match_len (ep->name, strlen (ep->name), name, length)) || (!ep->can_abbreviate && length == strlen (ep->name) - && !memcmp (name, ep->name, length))) + && !mm_case_compare (name, ep->name, length))) { *output = ep->value; return true; diff --git a/src/data-list.c b/src/data-list.c index 24fa0dbc..3d730939 100644 --- a/src/data-list.c +++ b/src/data-list.c @@ -65,7 +65,7 @@ struct dls_var_spec int fc, lc; /* Column numbers in record. */ /* Free format only. */ - char name[SHORT_NAME_LEN + 1]; /* Name of variable. */ + char name[LONG_NAME_LEN + 1]; /* Name of variable. */ }; /* Constants for DATA LIST type. */ @@ -383,7 +383,7 @@ parse_fixed (struct data_list_pgm *dls) else { msg (SE, _("SPSS-like or FORTRAN-like format " - "specification expected after variable names.")); + "specification expected after variable names.")); goto fail; } @@ -889,7 +889,7 @@ parse_free (struct dls_var_spec **first, struct dls_var_spec **last) spec->input = input; spec->v = v; spec->fv = v->fv; - strcpy (spec->name, v->name); + st_trim_copy (spec->name, v->name, sizeof spec->name); append_var_spec (first, last, spec); } for (i = 0; i < name_cnt; i++) diff --git a/src/descript.c b/src/descript.c index e0717993..57782c57 100644 --- a/src/descript.c +++ b/src/descript.c @@ -120,7 +120,7 @@ static const struct dsc_statistic_info dsc_info[DSC_N_STATS] = struct dsc_var { struct variable *v; /* Variable to calculate on. */ - char z_name[SHORT_NAME_LEN + 1];/* Name for z-score variable. */ + char z_name[LONG_NAME_LEN + 1]; /* Name for z-score variable. */ double valid, missing; /* Valid, missing counts. */ struct moments *moments; /* Moments. */ double min, max; /* Maximum and mimimum values. */ @@ -465,7 +465,7 @@ try_name (struct dsc_proc *dsc, char *name) if (dict_lookup_var (default_dict, name) != NULL) return 0; for (i = 0; i < dsc->var_cnt; i++) - if (!strcmp (dsc->vars[i].z_name, name)) + if (!strcasecmp (dsc->vars[i].z_name, name)) return 0; return 1; } @@ -478,12 +478,11 @@ static int generate_z_varname (struct dsc_proc *dsc, char *z_name, const char *var_name, int *z_cnt) { - char name[10]; + char name[LONG_NAME_LEN + 1]; /* Try a name based on the original variable name. */ name[0] = 'Z'; - strcpy (name + 1, var_name); - name[SHORT_NAME_LEN] = '\0'; + st_trim_copy (name + 1, var_name, sizeof name - 1); if (try_name (dsc, name)) { strcpy (z_name, name); @@ -925,7 +924,7 @@ descriptives_compare_dsc_vars (const void *a_, const void *b_, void *dsc_) int result; if (dsc->sort_by_stat == DSC_NAME) - result = strcmp (a->v->name, b->v->name); + result = strcasecmp (a->v->name, b->v->name); else { double as = a->stats[dsc->sort_by_stat]; diff --git a/src/dictionary.c b/src/dictionary.c index a3185b99..43b4f779 100644 --- a/src/dictionary.c +++ b/src/dictionary.c @@ -27,6 +27,7 @@ #include "error.h" #include "hash.h" #include "misc.h" +#include "settings.h" #include "str.h" #include "value-labels.h" #include "var.h" @@ -37,7 +38,6 @@ struct dictionary struct variable **var; /* Variables. */ size_t var_cnt, var_cap; /* Number of variables, capacity. */ struct hsh_table *name_tab; /* Variable index by name. */ - struct hsh_table *long_name_tab; /* Variable indexed by long name */ int next_value_idx; /* Index of next `union value' to allocate. */ struct variable **split; /* SPLIT FILE vars. */ size_t split_cnt; /* SPLIT FILE count. */ @@ -50,46 +50,6 @@ struct dictionary size_t vector_cnt; /* Number of vectors. */ }; - - - - -int -compare_long_names(const void *a_, const void *b_, void *aux UNUSED) -{ - const struct name_table_entry *a = a_; - const struct name_table_entry *b = b_; - - return strcasecmp(a->longname, b->longname); -} - - -/* Long names use case insensitive comparison */ -unsigned int -hash_long_name (const void *e_, void *aux UNUSED) -{ - const struct name_table_entry *e = e_; - unsigned int hash; - int i; - - char *s = strdup(e->longname); - - for ( i = 0 ; i < strlen(s) ; ++i ) - s[i] = toupper(s[i]); - - hash = hsh_hash_string (s); - - free (s); - - return hash; -} - - - - -static char *make_short_name(struct dictionary *dict, const char *longname) ; - - /* Creates and returns a new dictionary. */ struct dictionary * dict_create (void) @@ -99,8 +59,6 @@ dict_create (void) d->var = NULL; d->var_cnt = d->var_cap = 0; d->name_tab = hsh_create (8, compare_var_names, hash_var_name, NULL, NULL); - d->long_name_tab = hsh_create (8, compare_long_names, hash_long_name, - (hsh_free_func *) free_nte, NULL); d->next_value_idx = 0; d->split = NULL; d->split_cnt = 0; @@ -127,8 +85,12 @@ dict_clone (const struct dictionary *s) d = dict_create (); - for (i = 0; i < s->var_cnt; i++) - dict_clone_var (d, s->var[i], s->var[i]->name, s->var[i]->longname); + for (i = 0; i < s->var_cnt; i++) + { + struct variable *sv = s->var[i]; + struct variable *dv = dict_clone_var_assert (d, sv, sv->name); + var_set_short_name (dv, sv->short_name); + } d->next_value_idx = s->next_value_idx; @@ -150,9 +112,21 @@ dict_clone (const struct dictionary *s) dict_set_label (d, dict_get_label (s)); dict_set_documents (d, dict_get_documents (s)); + d->vector_cnt = s->vector_cnt; + d->vector = xmalloc (d->vector_cnt * sizeof *d->vector); for (i = 0; i < s->vector_cnt; i++) - dict_create_vector (d, s->vector[i]->name, - s->vector[i]->var, s->vector[i]->cnt); + { + struct vector *sv = s->vector[i]; + struct vector *dv = d->vector[i] = xmalloc (sizeof *dv); + int j; + + dv->idx = i; + strcpy (dv->name, sv->name); + dv->cnt = sv->cnt; + dv->var = xmalloc (dv->cnt * sizeof *dv->var); + for (j = 0; j < dv->cnt; j++) + dv->var[j] = d->var[sv->var[j]->index]; + } return d; } @@ -180,8 +154,6 @@ dict_clear (struct dictionary *d) d->var = NULL; d->var_cnt = d->var_cap = 0; hsh_clear (d->name_tab); - if ( d->long_name_tab) - hsh_clear (d->long_name_tab); d->next_value_idx = 0; free (d->split); d->split = NULL; @@ -196,85 +168,6 @@ dict_clear (struct dictionary *d) dict_clear_vectors (d); } -/* Allocate the pointer TEXT and fill it with text representing the - long variable name buffer. SIZE will contain the size of TEXT. - TEXT must be freed by the caller when no longer required. -*/ -void -dict_get_varname_block(const struct dictionary *dict, char **text, int *size) -{ - char *buf = 0; - int bufsize = 0; - struct hsh_iterator hi; - struct name_table_entry *nte; - short first = 1; - - for ( nte = hsh_first(dict->long_name_tab, &hi); - nte; - nte = hsh_next(dict->long_name_tab, &hi)) - { - bufsize += strlen(nte->name) + strlen(nte->longname) + 2; - buf = xrealloc(buf, bufsize + 1); - if ( first ) - strcpy(buf, ""); - first = 0; - - strcat(buf, nte->name); - strcat(buf, "="); - strcat(buf, nte->longname); - strcat(buf, "\t"); - } - - if ( bufsize > 0 ) - { - /* Loose the final delimiting TAB */ - buf[bufsize]='\0'; - bufsize--; - } - - *text = buf; - *size = bufsize; -} - -/* Add a new entry into the dictionary's long name table, and update the - corresponding variable with the relevant long name. -*/ -void -dict_add_longvar_entry(struct dictionary *d, - const char *name, - const char *longname) -{ - struct variable *v; - assert ( name ) ; - assert ( longname ); - struct name_table_entry *nte = xmalloc (sizeof (struct name_table_entry)); - nte->longname = strdup(longname); - nte->name = strdup(name); - - /* Look up the name in name_tab */ - v = hsh_find ( d->name_tab, name); - if ( !v ) - { - msg (FE, _("The entry \"%s\" in the variable name map, has no corresponding variable"), name); - return ; - } - assert ( 0 == strcmp(v->name, name) ); - v->longname = nte->longname; - - hsh_insert(d->long_name_tab, nte); -} - -/* Destroy and free up an nte */ -void -free_nte(struct name_table_entry *nte) -{ - assert(nte); - free(nte->longname); - free(nte->name); - free(nte); -} - - /* Destroys the aux data for every variable in D, by calling var_clear_aux() for each variable. */ void @@ -296,7 +189,6 @@ dict_destroy (struct dictionary *d) { dict_clear (d); hsh_destroy (d->name_tab); - hsh_destroy (d->long_name_tab); free (d); } } @@ -354,44 +246,11 @@ dict_get_vars (const struct dictionary *d, struct variable ***vars, } -static struct variable * dict_create_var_x (struct dictionary *d, - const char *name, int width, - short name_is_short) ; - -/* Creates and returns a new variable in D with the given LONGNAME - and WIDTH. Returns a null pointer if the given LONGNAME would - duplicate that of an existing variable in the dictionary. -*/ -struct variable * -dict_create_var (struct dictionary *d, const char *longname, int width) -{ - return dict_create_var_x(d, longname, width, 0); -} - - -/* Creates and returns a new variable in D with the given SHORTNAME and - WIDTH. The long name table is not updated */ -struct variable * -dict_create_var_from_short (struct dictionary *d, const char *shortname, - int width) -{ - return dict_create_var_x(d, shortname, width, 1); -} - - - /* Creates and returns a new variable in D with the given NAME - and WIDTH. - If NAME_IS_SHORT, assume NAME is the short name. Otherwise assumes - NAME is the long name, and creates the corresponding entry in the - Dictionary's lookup name table . - Returns a null pointer if the given NAME would - duplicate that of an existing variable in the dictionary. - -*/ -static struct variable * -dict_create_var_x (struct dictionary *d, const char *name, int width, - short name_is_short) + and WIDTH. Returns a null pointer if the given NAME would + duplicate that of an existing variable in the dictionary. */ +struct variable * +dict_create_var (struct dictionary *d, const char *name, int width) { struct variable *v; @@ -399,41 +258,24 @@ dict_create_var_x (struct dictionary *d, const char *name, int width, assert (name != NULL); assert (strlen (name) >= 1); + assert (strlen (name) <= LONG_NAME_LEN); assert (width >= 0 && width < 256); - if ( name_is_short ) - assert(strlen (name) <= SHORT_NAME_LEN); - else - assert(strlen (name) <= LONG_NAME_LEN); - /* Make sure there's not already a variable by that name. */ if (dict_lookup_var (d, name) != NULL) return NULL; /* Allocate and initialize variable. */ v = xmalloc (sizeof *v); - - if ( name_is_short ) - { - strncpy (v->name, name, sizeof v->name); - v->name[SHORT_NAME_LEN] = '\0'; - } - else - { - const char *sn = make_short_name(d, name); - strncpy(v->name, sn, SHORT_NAME_LEN + 1); - free(sn); - } - - - v->index = d->var_cnt; + st_trim_copy (v->name, name, sizeof v->name); v->type = width == 0 ? NUMERIC : ALPHA; v->width = width; v->fv = d->next_value_idx; v->nv = width == 0 ? 1 : DIV_RND_UP (width, 8); v->init = 1; v->reinit = dict_class_from_id (v->name) != DC_SCRATCH; + v->index = d->var_cnt; v->miss_type = MISSING_NONE; if (v->type == NUMERIC) { @@ -458,6 +300,7 @@ dict_create_var_x (struct dictionary *d, const char *name, int width, v->write = v->print; v->val_labs = val_labs_create (v->width); v->label = NULL; + var_clear_short_name (v); v->aux = NULL; v->aux_dtor = NULL; @@ -471,9 +314,6 @@ dict_create_var_x (struct dictionary *d, const char *name, int width, d->var_cnt++; hsh_force_insert (d->name_tab, v); - if ( ! name_is_short) - dict_add_longvar_entry(d, v->name, name); - d->next_value_idx += v->nv; return v; @@ -483,49 +323,38 @@ dict_create_var_x (struct dictionary *d, const char *name, int width, and WIDTH. Assert-fails if the given NAME would duplicate that of an existing variable in the dictionary. */ struct variable * -dict_create_var_assert (struct dictionary *d, const char *longname, int width) +dict_create_var_assert (struct dictionary *d, const char *name, int width) { - struct variable *v = dict_create_var (d, longname, width); + struct variable *v = dict_create_var (d, name, width); assert (v != NULL); return v; } -/* Creates a new variable in D with longname LONGNAME, as a copy of - existing variable OV, which need not be in D or in any - dictionary. - If SHORTNAME is non null, it will be used as the short name - otherwise a new short name will be generated. -*/ +/* Creates and returns a new variable in D with name NAME, as a + copy of existing variable OV, which need not be in D or in any + dictionary. Returns a null pointer if the given NAME would + duplicate that of an existing variable in the dictionary. */ struct variable * dict_clone_var (struct dictionary *d, const struct variable *ov, - const char *name, const char *longname) + const char *name) { struct variable *nv; assert (d != NULL); assert (ov != NULL); - assert (strlen (longname) <= LONG_NAME_LEN); - - struct name_table_entry *nte = xmalloc (sizeof (struct name_table_entry)); - - nte->longname = strdup(longname); - if ( name ) - { - assert (strlen (name) >= 1); - assert (strlen (name) <= SHORT_NAME_LEN); - nte->name = strdup(name); - } - else - nte->name = make_short_name(d, longname); + assert (name != NULL); + assert (strlen (name) >= 1); + assert (strlen (name) <= LONG_NAME_LEN); - nv = dict_create_var_from_short (d, nte->name, ov->width); + nv = dict_create_var (d, name, ov->width); if (nv == NULL) return NULL; - hsh_insert(d->long_name_tab, nte); - nv->longname = nte->longname; - + /* Copy most members not copied via dict_create_var(). + short_name[] is intentionally not copied, because there is + no reason to give a new variable with potentially a new name + the same short name. */ nv->init = 1; nv->reinit = ov->reinit; nv->miss_type = ov->miss_type; @@ -536,37 +365,24 @@ dict_clone_var (struct dictionary *d, const struct variable *ov, nv->val_labs = val_labs_copy (ov->val_labs); if (ov->label != NULL) nv->label = xstrdup (ov->label); - - nv->alignment = ov->alignment; nv->measure = ov->measure; nv->display_width = ov->display_width; + nv->alignment = ov->alignment; return nv; } -/* Changes the name of V in D to name NEW_NAME. Assert-fails if - a variable named NEW_NAME is already in D, except that - NEW_NAME may be the same as V's existing name. */ -void -dict_rename_var (struct dictionary *d, struct variable *v, - const char *new_name) +/* Creates and returns a new variable in D with name NAME, as a + copy of existing variable OV, which need not be in D or in any + dictionary. Assert-fails if the given NAME would duplicate + that of an existing variable in the dictionary. */ +struct variable * +dict_clone_var_assert (struct dictionary *d, const struct variable *ov, + const char *name) { - assert (d != NULL); + struct variable *v = dict_clone_var (d, ov, name); assert (v != NULL); - assert (new_name != NULL); - assert (strlen (new_name) >= 1 && strlen (new_name) <= SHORT_NAME_LEN); - assert (dict_contains_var (d, v)); - - if (!strcmp (v->name, new_name)) - return; - - assert (dict_lookup_var (d, new_name) == NULL); - - hsh_force_delete (d->name_tab, v); - strncpy (v->name, new_name, sizeof v->name); - v->name[SHORT_NAME_LEN] = '\0'; - hsh_force_insert (d->name_tab, v); - dict_add_longvar_entry (d, new_name, new_name); + return v; } /* Returns the variable named NAME in D, or a null pointer if no @@ -575,35 +391,14 @@ struct variable * dict_lookup_var (const struct dictionary *d, const char *name) { struct variable v; - struct variable *vr; - char *short_name; - struct name_table_entry key; - struct name_table_entry *nte; - assert (d != NULL); assert (name != NULL); - assert (strlen (name) >= 1 && strlen (name) <= LONG_NAME_LEN); - key.longname = name; - nte = hsh_find (d->long_name_tab, &key); - - if ( ! nte ) - { - return 0; - } - - short_name = nte->name ; - - strncpy (v.name, short_name, sizeof v.name); - v.name[SHORT_NAME_LEN] = '\0'; - - vr = hsh_find (d->name_tab, &v); - - return vr; + st_trim_copy (v.name, name, sizeof v.name); + return hsh_find (d->name_tab, &v); } - /* Returns the variable named NAME in D. Assert-fails if no variable has that name. */ struct variable * @@ -701,6 +496,29 @@ dict_delete_vars (struct dictionary *d, dict_delete_var (d, *vars++); } +/* Moves V to 0-based position IDX in D. Other variables in D, + if any, retain their relative positions. Runs in time linear + in the distance moved. */ +void +dict_reorder_var (struct dictionary *d, struct variable *v, + size_t new_index) +{ + size_t min_idx, max_idx; + size_t i; + + assert (d != NULL); + assert (v != NULL); + assert (dict_contains_var (d, v)); + assert (new_index < d->var_cnt); + + move_element (d->var, d->var_cnt, sizeof *d->var, v->index, new_index); + + min_idx = min (v->index, new_index); + max_idx = max (v->index, new_index); + for (i = min_idx; i <= max_idx; i++) + d->var[i]->index = i; +} + /* Reorders the variables in D, placing the COUNT variables listed in ORDER in that order at the beginning of D. The other variables in D, if any, retain their relative @@ -736,6 +554,29 @@ dict_reorder_vars (struct dictionary *d, d->var = new_var; } +/* Changes the name of V in D to name NEW_NAME. Assert-fails if + a variable named NEW_NAME is already in D, except that + NEW_NAME may be the same as V's existing name. */ +void +dict_rename_var (struct dictionary *d, struct variable *v, + const char *new_name) +{ + assert (d != NULL); + assert (v != NULL); + assert (new_name != NULL); + assert (var_is_valid_name (new_name, false)); + assert (dict_contains_var (d, v)); + assert (!compare_var_names (v->name, new_name, NULL) + || dict_lookup_var (d, new_name) == NULL); + + hsh_force_delete (d->name_tab, v); + st_trim_copy (v->name, new_name, sizeof v->name); + hsh_force_insert (d->name_tab, v); + + if (get_algorithm () == ENHANCED) + var_clear_short_name (v); +} + /* Renames COUNT variables specified in VARS to the names given in NEW_NAMES within dictionary D. If the renaming would result in a duplicate variable name, returns zero and stores a @@ -751,44 +592,36 @@ dict_rename_vars (struct dictionary *d, size_t i; int success = 1; - assert (d != NULL); assert (count == 0 || vars != NULL); assert (count == 0 || new_names != NULL); - + /* Remove the variables to be renamed from the name hash, + save their names, and rename them. */ old_names = xmalloc (count * sizeof *old_names); for (i = 0; i < count; i++) { assert (d->var[vars[i]->index] == vars[i]); + assert (var_is_valid_name (new_names[i], false)); hsh_force_delete (d->name_tab, vars[i]); old_names[i] = xstrdup (vars[i]->name); + strcpy (vars[i]->name, new_names[i]); } - + + /* Add the renamed variables back into the name hash, + checking for conflicts. */ for (i = 0; i < count; i++) { - char *sn; - struct name_table_entry key; - struct name_table_entry *nte; assert (new_names[i] != NULL); assert (*new_names[i] != '\0'); - assert (strlen (new_names[i]) <= LONG_NAME_LEN ); - - sn = make_short_name(d, new_names[i]); - strncpy(vars[i]->name, sn, SHORT_NAME_LEN + 1); - free(sn); - - + assert (strlen (new_names[i]) >= 1); + assert (strlen (new_names[i]) <= LONG_NAME_LEN); - key.longname = vars[i]->longname; - nte = hsh_find (d->long_name_tab, &key); - - free( nte->longname ) ; - nte->longname = strdup ( new_names[i]); - vars[i]->longname = nte->longname; - - if (hsh_insert (d->name_tab, vars[i]) != NULL ) + if (hsh_insert (d->name_tab, vars[i]) != NULL) { + /* There is a name conflict. + Back out all the name changes that have already + taken place, and indicate failure. */ size_t fail_idx = i; if (err_name != NULL) *err_name = new_names[i]; @@ -800,21 +633,20 @@ dict_rename_vars (struct dictionary *d, { strcpy (vars[i]->name, old_names[i]); hsh_force_insert (d->name_tab, vars[i]); - - key.longname = vars[i]->longname; - nte = hsh_find (d->long_name_tab, &key); - - free( nte->longname ) ; - nte->longname = strdup ( old_names[i]); - vars[i]->longname = nte->longname; - } success = 0; - break; + goto done; } } + /* Clear short names. */ + if (get_algorithm () == ENHANCED) + for (i = 0; i < count; i++) + var_clear_short_name (vars[i]); + + done: + /* Free the old names we kept around. */ for (i = 0; i < count; i++) free (old_names[i]); free (old_names); @@ -1125,10 +957,11 @@ dict_create_vector (struct dictionary *d, struct variable **var, size_t cnt) { struct vector *vector; + size_t i; assert (d != NULL); assert (name != NULL); - assert (strlen (name) > 0 && strlen (name) <= SHORT_NAME_LEN ); + assert (var_is_valid_name (name, false)); assert (var != NULL); assert (cnt > 0); @@ -1138,10 +971,13 @@ dict_create_vector (struct dictionary *d, d->vector = xrealloc (d->vector, (d->vector_cnt + 1) * sizeof *d->vector); vector = d->vector[d->vector_cnt] = xmalloc (sizeof *vector); vector->idx = d->vector_cnt++; - strncpy (vector->name, name, SHORT_NAME_LEN); - vector->name[SHORT_NAME_LEN] = '\0'; + st_trim_copy (vector->name, name, sizeof vector->name); vector->var = xmalloc (cnt * sizeof *var); - memcpy (vector->var, var, cnt * sizeof *var); + for (i = 0; i < cnt; i++) + { + assert (dict_contains_var (d, var[i])); + vector->var[i] = var[i]; + } vector->cnt = cnt; return 1; @@ -1178,7 +1014,7 @@ dict_lookup_vector (const struct dictionary *d, const char *name) assert (name != NULL); for (i = 0; i < d->vector_cnt; i++) - if (!strcmp (d->vector[i]->name, name)) + if (!strcasecmp (d->vector[i]->name, name)) return d->vector[i]; return NULL; } @@ -1201,82 +1037,74 @@ dict_clear_vectors (struct dictionary *d) d->vector_cnt = 0; } - -static const char * quasi_base27(int i); - - -/* Convert I to quasi base 27 - The result is a staticly allocated string. -*/ -static const char * -quasi_base27(int i) +/* Compares two strings. */ +static int +compare_strings (const void *a, const void *b, void *aux UNUSED) { - static char result[SHORT_NAME_LEN + 1]; - static char reverse[SHORT_NAME_LEN + 1]; - - /* FIXME: check the result cant overflow these arrays */ - - char *s = result ; - const int radix = 27; - int units; - - /* and here's the quasi-ness of this routine */ - i = i + ( i / radix ); - - strcpy(result,""); - do { - units = i % radix; - *s++ = (units > 0 ) ? units + 'A' - 1 : 'A'; - i = i / radix; - } while (i > 0 ) ; - *s = '\0'; - - /* Reverse the result */ - i = strlen(result); - s = reverse; - while(i >= 0) - *s++ = result[--i]; - *s = '\0'; - - return reverse; + return strcmp (a, b); } - -/* Generate a short name, given a long name. - The return value of this function must be freed by the caller. -*/ -static char * -make_short_name(struct dictionary *dict, const char *longname) +/* Hashes a string. */ +static unsigned +hash_string (const void *s, void *aux UNUSED) { - int i = 0; - char *p; - - - char *d = xmalloc ( SHORT_NAME_LEN + 1); + return hsh_hash_string (s); +} - /* Truncate the name */ - strncpy(d, longname, SHORT_NAME_LEN); - d[SHORT_NAME_LEN] = '\0'; +/* Assigns a valid, unique short_name[] to each variable in D. + Each variable whose actual name is short has highest priority + for that short name. Otherwise, variables with an existing + short_name[] have the next highest priority for a given short + name; if it is already taken, then the variable is treated as + if short_name[] had been empty. Otherwise, long names are + truncated to form short names. If that causes conflicts, + variables are renamed as PREFIX_A, PREFIX_B, and so on. */ +void +dict_assign_short_names (struct dictionary *d) +{ + struct hsh_table *short_names; + size_t i; - /* Convert to upper case */ - for ( p = d; *p ; ++p ) - *p = toupper(*p); + /* Give variables whose names are short the corresponding short + names, and clear short_names[] that conflict with a variable + name. */ + for (i = 0; i < d->var_cnt; i++) + { + struct variable *v = d->var[i]; + if (strlen (v->name) <= SHORT_NAME_LEN) + var_set_short_name (v, v->name); + else if (dict_lookup_var (d, v->short_name) != NULL) + var_clear_short_name (v); + } - /* If a variable with that name already exists, then munge it - until there's no conflict */ - while (0 != hsh_find (dict->name_tab, d)) - { - const char *suffix = quasi_base27(i++); + /* Each variable with an assigned short_name[] now gets it + unless there is a conflict. */ + short_names = hsh_create (d->var_cnt, compare_strings, hash_string, + NULL, NULL); + for (i = 0; i < d->var_cnt; i++) + { + struct variable *v = d->var[i]; + if (v->short_name[0] && hsh_insert (short_names, v->short_name) != NULL) + var_clear_short_name (v); + } + + /* Now assign short names to remaining variables. */ + for (i = 0; i < d->var_cnt; i++) + { + struct variable *v = d->var[i]; + if (v->short_name[0] == '\0') + { + int sfx; - d[SHORT_NAME_LEN - strlen(suffix) - 1 ] = '_'; - d[SHORT_NAME_LEN - strlen(suffix) ] = '\0'; - strcat(d, suffix); - } + /* Form initial short_name. */ + var_set_short_name (v, v->name); + /* Try _A, _B, ... _AA, _AB, etc., if needed. */ + for (sfx = 0; hsh_insert (short_names, v->short_name) != NULL; sfx++) + var_set_short_name_suffix (v, v->name, sfx); + } + } - return d; + /* Get rid of hash table. */ + hsh_destroy (short_names); } - - - - diff --git a/src/dictionary.h b/src/dictionary.h index 58092b75..06e190a3 100644 --- a/src/dictionary.h +++ b/src/dictionary.h @@ -40,15 +40,12 @@ void dict_get_vars (const struct dictionary *, struct variable *dict_create_var (struct dictionary *, const char *, int width); -struct variable *dict_create_var_from_short (struct dictionary *d, - const char *shortname, - int width); - struct variable *dict_create_var_assert (struct dictionary *, const char *, int width); struct variable *dict_clone_var (struct dictionary *, const struct variable *, - const char *shortname, const char *longname); -void dict_rename_var (struct dictionary *, struct variable *, const char *); + const char *); +struct variable *dict_clone_var_assert (struct dictionary *, + const struct variable *, const char *); struct variable *dict_lookup_var (const struct dictionary *, const char *); struct variable *dict_lookup_var_assert (const struct dictionary *, @@ -57,8 +54,11 @@ int dict_contains_var (const struct dictionary *, const struct variable *); void dict_delete_var (struct dictionary *, struct variable *); void dict_delete_vars (struct dictionary *, struct variable *const *, size_t count); +void dict_reorder_var (struct dictionary *d, struct variable *v, + size_t new_index); void dict_reorder_vars (struct dictionary *, struct variable *const *, size_t count); +void dict_rename_var (struct dictionary *, struct variable *, const char *); int dict_rename_vars (struct dictionary *, struct variable **, char **new_names, size_t count, char **err_name); @@ -105,12 +105,6 @@ const struct vector *dict_lookup_vector (const struct dictionary *, const char *name); void dict_clear_vectors (struct dictionary *); -void dict_get_varname_block(const struct dictionary *dict, char **buf, int *size); - -void dict_add_longvar_entry(struct dictionary *d, const char *name, - const char *longname); - - - +void dict_assign_short_names (struct dictionary *); #endif /* dictionary.h */ diff --git a/src/expressions/parse.c b/src/expressions/parse.c index 7af90feb..15efe00b 100644 --- a/src/expressions/parse.c +++ b/src/expressions/parse.c @@ -881,14 +881,14 @@ word_matches (const char **test, const char **name) size_t name_len = strcspn (*name, "."); if (test_len == name_len) { - if (memcmp (*test, *name, test_len)) + if (mm_case_compare (*test, *name, test_len)) return false; } else if (test_len < 3 || test_len > name_len) return false; else { - if (memcmp (*test, *name, test_len)) + if (mm_case_compare (*test, *name, test_len)) return false; } @@ -917,6 +917,12 @@ compare_names (const char *test, const char *name) } } +static int +compare_strings (const char *test, const char *name) +{ + return strcasecmp (test, name); +} + static bool lookup_function_helper (const char *name, int (*compare) (const char *test, const char *name), @@ -947,7 +953,7 @@ lookup_function (const char *name, const struct operation **last) { *first = *last = NULL; - return (lookup_function_helper (name, strcmp, first, last) + return (lookup_function_helper (name, compare_strings, first, last) || lookup_function_helper (name, compare_names, first, last)); } @@ -1273,7 +1279,7 @@ expr_allocate_nullary (struct expression *e, operation_type op) union any_node * expr_allocate_unary (struct expression *e, operation_type op, -union any_node *arg0) + union any_node *arg0) { return expr_allocate_composite (e, op, &arg0, 1); } diff --git a/src/file-handle.q b/src/file-handle.q index 9596c0d2..688fbb89 100644 --- a/src/file-handle.q +++ b/src/file-handle.q @@ -73,7 +73,7 @@ get_handle_with_name (const char *handle_name) struct file_handle *iter; for (iter = file_handles; iter != NULL; iter = iter->next) - if (!strcmp (handle_name, iter->name)) + if (!strcasecmp (handle_name, iter->name)) return iter; return NULL; } @@ -109,22 +109,21 @@ get_handle_for_filename (const char *filename) int cmd_file_handle (void) { - char handle_name[9]; + char handle_name[LONG_NAME_LEN + 1]; struct cmd_file_handle cmd; struct file_handle *handle; if (!lex_force_id ()) return CMD_FAILURE; - strcpy (handle_name, tokid); + st_trim_copy (handle_name, tokid, sizeof handle_name); handle = get_handle_with_name (handle_name); if (handle != NULL) { - msg (SE, _("File handle %s already refers to " - "file %s. File handle cannot be redefined within a " - "session."), - tokid, handle->filename); + msg (SE, _("File handle %s already refers to file %s. " + "File handles cannot be redefined within a session."), + handle_name, handle->filename); return CMD_FAILURE; } diff --git a/src/file-type.c b/src/file-type.c index 42ca6843..87bd874f 100644 --- a/src/file-type.c +++ b/src/file-type.c @@ -44,7 +44,7 @@ enum /* Limited variable column specifications. */ struct col_spec { - char name[SHORT_NAME_LEN + 1]; /* Variable name. */ + char name[LONG_NAME_LEN + 1]; /* Variable name. */ int fc, nc; /* First column (1-based), # of columns. */ int fmt; /* Format type. */ struct variable *v; /* Variable. */ @@ -264,7 +264,7 @@ cmd_file_type (void) goto error; } - if (!strcmp (fty->case_sbc.name, fty->record.name)) + if (!strcasecmp (fty->case_sbc.name, fty->record.name)) { msg (SE, _("CASE and RECORD must specify different variable " "names.")); diff --git a/src/flip.c b/src/flip.c index 128c1a97..c882f29a 100644 --- a/src/flip.c +++ b/src/flip.c @@ -181,25 +181,27 @@ destroy_flip_pgm (struct flip_pgm *flip) static int make_new_var (char name[]) { + char *cp; + + /* Trim trailing spaces. */ + cp = strchr (name, '\0'); + while (cp > name && isspace ((unsigned char) cp[-1])) + *--cp = '\0'; + /* Fix invalid characters. */ - { - char *cp; - - for (cp = name; *cp && !isspace (*cp); cp++) + for (cp = name; *cp && cp < name + SHORT_NAME_LEN; cp++) + if (cp == name) { - *cp = toupper ((unsigned char) *cp); - if (!isalpha (*cp) && *cp != '@' && *cp != '#' - && (cp == name || (*cp != '.' && *cp != '$' && *cp != '_' - && !isdigit (*cp)))) - { - if (cp == name) - *cp = 'V'; /* _ not valid in first position. */ - else - *cp = '_'; - } + if (!CHAR_IS_ID1 (*cp) || *cp == '$') + *cp = 'V'; } - *cp = 0; - } + else + { + if (!CHAR_IS_IDN (*cp)) + *cp = '_'; + } + *cp = '\0'; + st_uppercase (name); if (dict_create_var (default_dict, name, 0)) return 1; @@ -323,8 +325,7 @@ flip_sink_write (struct case_sink *sink, const struct ccase *c) { char name[INT_DIGITS + 2]; sprintf (name, "V%d", (int) f); - strncpy (v->name, name, SHORT_NAME_LEN); - name[SHORT_NAME_LEN] = 0; + st_trim_copy (v->name, name, sizeof v->name); } } else diff --git a/src/format.c b/src/format.c index a1cea8d6..b3070e21 100644 --- a/src/format.c +++ b/src/format.c @@ -59,7 +59,7 @@ parse_format_specifier_name (const char **cp, enum fmt_parse_flags flags) /* Find format. */ for (idx = 0; idx < FMT_NUMBER_OF_FORMATS; idx++) if (strlen (formats[idx].name) == ep - sp - && !memcmp (formats[idx].name, sp, ep - sp)) + && !mm_case_compare (formats[idx].name, sp, ep - sp)) break; /* Check format. */ diff --git a/src/get.c b/src/get.c index c4dcc820..851059ca 100644 --- a/src/get.c +++ b/src/get.c @@ -212,17 +212,13 @@ cmd_save_internal (void) if (lex_match_id ("VERSION")) { lex_match ('='); - if ( lex_force_num() ) + if (lex_force_int ()) { - lex_get(); - version = tokval; + version = lex_integer (); + lex_get (); - if ( 0 == strncasecmp (tokid,"x", 1) ) - { - lex_get(); - no_name_table = 1; - } - + if (lex_match_id ("X")) + no_name_table = 1; } } else if (lex_match_id ("OUTFILE")) @@ -450,8 +446,6 @@ rename_variables (struct dictionary *dict) if (!lex_force_match ('=') || !lex_force_id ()) return 0; - if (!strncmp (tokid, v->name, SHORT_NAME_LEN)) - return 1; if (dict_lookup_var (dict, tokid) != NULL) { msg (SE, _("Cannot rename %s as %s because there already exists " @@ -686,7 +680,7 @@ struct mtf_proc size_t by_cnt; /* Number of variables on BY subcommand. */ /* Names of FIRST, LAST variables. */ - char first[SHORT_NAME_LEN + 1], last[SHORT_NAME_LEN + 1]; + char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1]; struct dictionary *dict; /* Dictionary of output file. */ struct case_sink *sink; /* Sink to receive output. */ @@ -1437,10 +1431,7 @@ mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f) mv->label = xstrdup (dv->label); } else - { - mv = dict_clone_var (m, dv, dv->name, dv->longname); - assert (mv != NULL); - } + mv = dict_clone_var_assert (m, dv, dv->name); } return 1; diff --git a/src/hash.c b/src/hash.c index 870958e0..050b3857 100644 --- a/src/hash.c +++ b/src/hash.c @@ -20,6 +20,7 @@ #include #include "hash.h" #include "error.h" +#include #include #include #include "algorithm.h" @@ -27,19 +28,6 @@ #include "misc.h" #include "str.h" -/* Hash table. */ -struct hsh_table - { - size_t used; /* Number of filled entries. */ - size_t size; /* Number of entries (a power of 2). */ - void **entries; /* Hash table proper. */ - - void *aux; /* Auxiliary data for comparison functions. */ - hsh_compare_func *compare; - hsh_hash_func *hash; - hsh_free_func *free; - }; - /* Note for constructing hash functions: You can store the hash values in the records, then compare hash @@ -112,6 +100,22 @@ hsh_hash_string (const char *s_) return hash; } +/* Fowler-Noll-Vo 32-bit hash, for case-insensitive strings. */ +unsigned +hsh_hash_case_string (const char *s_) +{ + const unsigned char *s = s_; + unsigned hash; + + assert (s != NULL); + + hash = FNV_32_BASIS; + while (*s != '\0') + hash = (hash * FNV_32_PRIME) ^ toupper (*s++); + + return hash; +} + /* Hash for ints. */ unsigned hsh_hash_int (int i) @@ -131,6 +135,19 @@ hsh_hash_double (double d) /* Hash tables. */ +/* Hash table. */ +struct hsh_table + { + size_t used; /* Number of filled entries. */ + size_t size; /* Number of entries (a power of 2). */ + void **entries; /* Hash table proper. */ + + void *aux; /* Auxiliary data for comparison functions. */ + hsh_compare_func *compare; + hsh_hash_func *hash; + hsh_free_func *free; + }; + /* Creates a hash table with at least M entries. COMPARE is a function that compares two entries and returns 0 if they are identical, nonzero otherwise; HASH returns a nonnegative hash value @@ -198,6 +215,24 @@ hsh_destroy (struct hsh_table *h) } } +/* Locates an entry matching TARGET. Returns a pointer to the + entry, or a null pointer on failure. */ +static inline unsigned +locate_matching_entry (struct hsh_table *h, const void *target) +{ + unsigned i = h->hash (target, h->aux); + + for (;;) + { + void *entry; + i &= h->size - 1; + entry = h->entries[i]; + if (entry == NULL || !h->compare (entry, target, h->aux)) + return i; + i--; + } +} + /* Changes the capacity of H to NEW_SIZE. */ static void hsh_rehash (struct hsh_table *h, size_t new_size) @@ -215,15 +250,12 @@ hsh_rehash (struct hsh_table *h, size_t new_size) h->entries = xmalloc (sizeof *h->entries * h->size); for (i = 0; i < h->size; i++) h->entries[i] = NULL; - for (table_p = begin; table_p < end; table_p++) - if (*table_p != NULL) - { - void **entry = &h->entries[h->hash (*table_p, h->aux) & (h->size - 1)]; - while (*entry != NULL) - if (++entry >= h->entries + h->size) - entry = h->entries; - *entry = *table_p; - } + for (table_p = begin; table_p < end; table_p++) + { + void *entry = *table_p; + if (entry != NULL) + h->entries[locate_matching_entry (h, entry)] = entry; + } free (begin); } @@ -327,25 +359,17 @@ hsh_sort_copy (struct hsh_table *h) inline void ** hsh_probe (struct hsh_table *h, const void *target) { - void **entry; - + unsigned i; + assert (h != NULL); assert (target != NULL); - /* Order of these statements is important! */ if (h->used > h->size / 2) hsh_rehash (h, h->size * 2); - entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)]; - - while (*entry) - { - if (!h->compare (*entry, target, h->aux)) - return entry; - if (++entry >= h->entries + h->size) - entry = h->entries; - } - h->used++; - return entry; + i = locate_matching_entry (h, target); + if (h->entries[i] == NULL) + h->used++; + return &h->entries[i]; } /* Searches hash table H for TARGET. If not found, inserts @@ -381,30 +405,12 @@ hsh_replace (struct hsh_table *h, void *target) return old; } -/* Locates an entry matching TARGET. Returns a pointer to the - entry, or a null pointer on failure. */ -static inline void ** -locate_matching_entry (struct hsh_table *h, const void *target) -{ - void **entry = &h->entries[h->hash (target, h->aux) & (h->size - 1)]; - - while (*entry) - { - if (!h->compare (*entry, target, h->aux)) - return entry; - if (++entry >= h->entries + h->size) - entry = h->entries; - } - return NULL; -} - /* Returns the entry in hash table H that matches TARGET, or NULL if there is none. */ void * hsh_find (struct hsh_table *h, const void *target) { - void **entry = locate_matching_entry (h, target); - return entry != NULL ? *entry : NULL; + return h->entries[locate_matching_entry (h, target)]; } /* Deletes the entry in hash table H that matches TARGET. @@ -413,39 +419,34 @@ hsh_find (struct hsh_table *h, const void *target) Uses Knuth's Algorithm 6.4R (Deletion with linear probing). Because our load factor is at most 1/2, the average number of moves that this algorithm makes should be at most 2 - ln 2 ~= - 1.65. - - Not well tested. */ + 1.65. */ int hsh_delete (struct hsh_table *h, const void *target) { - void **entry = locate_matching_entry (h, target); - if (entry != NULL) + unsigned i = locate_matching_entry (h, target); + if (h->entries[i] != NULL) { - ptrdiff_t i; - h->used--; if (h->free != NULL) - h->free (*entry, h->aux); - *entry = 0; + h->free (h->entries[i], h->aux); - i = entry - h->entries; for (;;) { unsigned r; - ptrdiff_t j = i; + ptrdiff_t j; + h->entries[i] = NULL; + j = i; do { - if (--i < 0) - i = h->size - 1; + i = (i - 1) & (h->size - 1); if (h->entries[i] == NULL) return 1; r = h->hash (h->entries[i], h->aux) & (h->size - 1); } while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r)); - h->entries[i] = h->entries[j]; + h->entries[j] = h->entries[i]; } } else diff --git a/src/hash.h b/src/hash.h index f3b0e30c..ffacad8e 100644 --- a/src/hash.h +++ b/src/hash.h @@ -35,6 +35,7 @@ struct hsh_iterator /* Hash functions. */ unsigned hsh_hash_bytes (const void *, size_t); unsigned hsh_hash_string (const char *); +unsigned hsh_hash_case_string (const char *); unsigned hsh_hash_int (int); unsigned hsh_hash_double (double); diff --git a/src/lexer.c b/src/lexer.c index 1aa5454b..575e917b 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -51,7 +51,7 @@ double tokval; char tokid[LONG_NAME_LEN + 1]; /* T_ID, T_STRING: token string value. - For T_ID, this is not truncated to SHORT_NAME_LEN characters as is tokid. */ + For T_ID, this is not truncated as is tokid. */ struct string tokstr; /* Static variables. */ @@ -116,8 +116,7 @@ restore_token (void) assert (put_token != 0); token = put_token; ds_replace (&tokstr, ds_c_str (&put_tokstr)); - strncpy (tokid, ds_c_str (&put_tokstr), SHORT_NAME_LEN); - tokid[SHORT_NAME_LEN] = 0; + st_trim_copy (tokid, ds_c_str (&tokstr), sizeof tokid); tokval = put_tokval; put_token = 0; } @@ -137,8 +136,6 @@ save_token (void) void lex_get (void) { - int i; - /* If a token was pushed ahead, return it. */ if (put_token) { @@ -360,14 +357,10 @@ lex_get (void) while (CHAR_IS_IDN (*prog)) ds_putc (&tokstr, *prog++); - /* Copy tokstr to tokid, truncating it to LONG_NAME_LEN characters.*/ - strncpy (tokid, ds_c_str (&tokstr), LONG_NAME_LEN); - tokid[LONG_NAME_LEN] = 0; - - /* Convert to upper case */ - for ( i = 0 ; i < ds_length(&tokstr) ; ++i ) - tokstr.string[i] = toupper(tokstr.string[i]); + /* Copy tokstr to tokid, possibly truncating it.*/ + st_trim_copy (tokid, ds_c_str (&tokstr), sizeof tokid); + /* Determine token type. */ token = lex_id_to_token (ds_c_str (&tokstr), ds_length (&tokstr)); break; @@ -485,7 +478,8 @@ lex_match (int t) } /* If the current token is the identifier S, skips it and returns - nonzero. + nonzero. The identifier may be abbreviated to its first three + letters. Otherwise, returns zero. */ int lex_match_id (const char *s) @@ -609,7 +603,7 @@ lex_force_id (void) /* Comparing identifiers. */ /* Keywords match if one of the following is true: KW and TOK are - identical (barring differences in case), or TOK is at least 3 + identical (except for differences in case), or TOK is at least 3 characters long and those characters are identical to KW. KW_LEN is the length of KW, TOK_LEN is the length of TOK. */ int @@ -717,11 +711,11 @@ lex_put_back (int t) void lex_put_back_id (const char *id) { + assert (lex_id_to_token (id, strlen (id)) == T_ID); save_token (); token = T_ID; ds_replace (&tokstr, id); - strncpy (tokid, ds_c_str (&tokstr), SHORT_NAME_LEN); - tokid[SHORT_NAME_LEN] = 0; + st_trim_copy (tokid, ds_c_str (&tokstr), sizeof tokid); } /* Weird line processing functions. */ @@ -792,7 +786,7 @@ lex_preprocess_line (void) int quote; /* Remove C-style comments begun by slash-star and terminated by - star-slash or newline. */ + star-slash or newline. */ quote = comment = 0; for (cp = ds_c_str (&getl_buf); *cp; ) { diff --git a/src/list.q b/src/list.q index 3480efb2..3add8de1 100644 --- a/src/list.q +++ b/src/list.q @@ -517,8 +517,9 @@ determine_layout (void) { int column; /* Current column. */ int width; /* Accumulated width. */ + int height; /* Height of vertical names. */ int max_width; /* Page width. */ - + struct list_ext *prc; if (d->class == &html_class) @@ -557,14 +558,19 @@ determine_layout (void) } /* Try layout #2. */ - for (width = cmd.n_variables - 1, column = 0; + for (width = cmd.n_variables - 1, height = 0, column = 0; column < cmd.n_variables && width <= max_width; - column++) - width += cmd.v_variables[column]->print.w; + column++) + { + struct variable *v = cmd.v_variables[column]; + width += v->print.w; + if (strlen (v->name) > height) + height = strlen (v->name); + } /* If it fit then we need to determine how many labels can be written horizontally. */ - if (width <= max_width) + if (width <= max_width && height <= SHORT_NAME_LEN) { #ifndef NDEBUG prc->n_vertical = -1; diff --git a/src/loop.c b/src/loop.c index 784ca623..b6df5c2d 100644 --- a/src/loop.c +++ b/src/loop.c @@ -224,7 +224,7 @@ internal_cmd_loop (void) } } else - name[0] = 0; + name[0] = '\0'; /* Parse IF clause. */ if (lex_match_id ("IF")) @@ -243,7 +243,7 @@ internal_cmd_loop (void) } /* Find variable; create if necessary. */ - if (name[0]) + if (name[0] != '\0') { two->index = dict_lookup_var (default_dict, name); if (!two->index) diff --git a/src/matrix-data.c b/src/matrix-data.c index 4dc3fb18..73d473d1 100644 --- a/src/matrix-data.c +++ b/src/matrix-data.c @@ -218,7 +218,7 @@ cmd_matrix_data (void) int i; for (i = 0; i < nv; i++) - if (!strcmp (v[i], "VARNAME_")) + if (!strcasecmp (v[i], "VARNAME_")) { msg (SE, _("VARNAME_ cannot be explicitly specified on " "VARIABLES.")); @@ -236,7 +236,7 @@ cmd_matrix_data (void) { struct variable *new_var; - if (strcmp (v[i], "ROWTYPE_")) + if (strcasecmp (v[i], "ROWTYPE_")) { new_var = dict_create_var_assert (default_dict, v[i], 0); attach_mxd_aux (new_var, MXD_CONTINUOUS, i); @@ -306,7 +306,8 @@ cmd_matrix_data (void) if (dict_lookup_var (default_dict, tokid) == NULL && (lex_look_ahead () == '.' || lex_look_ahead () == '/')) { - if (!strcmp (tokid, "ROWTYPE_") || !strcmp (tokid, "VARNAME_")) + if (!strcasecmp (tokid, "ROWTYPE_") + || !strcasecmp (tokid, "VARNAME_")) { msg (SE, _("Split variable may not be named ROWTYPE_ " "or VARNAME_.")); @@ -668,7 +669,7 @@ string_to_content_type (char *s, int *collide) }; for (tp = tab; tp->value != -1; tp++) - if (!strcmp (s, tp->string)) + if (!strcasecmp (s, tp->string)) { if (collide) *collide = tp->collide; diff --git a/src/modify-vars.c b/src/modify-vars.c index 265160f7..fee6d59b 100644 --- a/src/modify-vars.c +++ b/src/modify-vars.c @@ -348,7 +348,7 @@ compare_variables_given_ordering (const void *a_, const void *b_, if (ordering->positional) result = a->index < b->index ? -1 : a->index > b->index; else - result = strcmp (a->name, b->name); + result = strcasecmp (a->name, b->name); if (!ordering->forward) result = -result; return result; @@ -358,7 +358,7 @@ compare_variables_given_ordering (const void *a_, const void *b_, struct var_renaming { struct variable *var; - char new_name[SHORT_NAME_LEN + 1]; + char new_name[LONG_NAME_LEN + 1]; }; /* A algo_compare_func that compares new_name members in struct @@ -370,7 +370,7 @@ compare_var_renaming_by_new_name (const void *a_, const void *b_, const struct var_renaming *a = a_; const struct var_renaming *b = b_; - return strcmp (a->new_name, b->new_name); + return strcasecmp (a->new_name, b->new_name); } /* Returns true if performing VM on dictionary D would not cause diff --git a/src/pfm-read.c b/src/pfm-read.c index 13953278..766e2140 100644 --- a/src/pfm-read.c +++ b/src/pfm-read.c @@ -520,30 +520,8 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) read_string (r, name); for (j = 0; j < 6; j++) fmt[j] = read_int (r); -#if 0 - /* Weirdly enough, there is no # character in the SPSS portable - character set, so we can't check for it. */ - if (strlen (name) > SHORT_NAME_LEN) - lose ((r, _("position %d: Variable name has %u characters."), - i, strlen (name))); - if ((name[0] < 74 /* A */ || name[0] > 125 /* Z */) - && name[0] != 152 /* @ */) - lose ((r, _("position %d: Variable name begins with invalid " - "character."), i)); - if (name[0] >= 100 /* a */ && name[0] <= 125 /* z */) - { - corrupt_msg (r, _("position %d: Variable name begins with " - "lowercase letter %c."), - i, name[0] - 100 + 'a'); - name[0] -= 26 /* a - A */; - } - /* Verify remaining characters of variable name. */ - for (j = 1; j < (int) strlen (name); j++) - { - int c = name[j]; -#endif - if (!var_is_valid_name (name, false) || *name == '#') + if (!var_is_valid_name (name, false) || *name == '#' || *name == '$') error (r, _("position %d: Invalid variable name `%s'."), name); st_uppercase (name); diff --git a/src/pfm-write.c b/src/pfm-write.c index 8117b5c5..61fb82b2 100644 --- a/src/pfm-write.c +++ b/src/pfm-write.c @@ -64,13 +64,14 @@ struct pfm_var static int buf_write (struct pfm_writer *, const void *, size_t); static int write_header (struct pfm_writer *); static int write_version_data (struct pfm_writer *); -static int write_variables (struct pfm_writer *, const struct dictionary *); +static int write_variables (struct pfm_writer *, struct dictionary *); static int write_value_labels (struct pfm_writer *, const struct dictionary *); /* Writes the dictionary DICT to portable file HANDLE. Returns - nonzero only if successful. */ + nonzero only if successful. DICT will not be modified, except + to assign short names. */ struct pfm_writer * -pfm_open_writer (struct file_handle *fh, const struct dictionary *dict) +pfm_open_writer (struct file_handle *fh, struct dictionary *dict) { struct pfm_writer *w = NULL; size_t i; @@ -368,9 +369,11 @@ write_value (struct pfm_writer *w, union value *v, struct variable *vv) /* Write variable records, and return success. */ static int -write_variables (struct pfm_writer *w, const struct dictionary *dict) +write_variables (struct pfm_writer *w, struct dictionary *dict) { int i; + + dict_assign_short_names (dict); if (!buf_write (w, "4", 1) || !write_int (w, dict_get_var_cnt (dict)) || !write_int (w, 161)) @@ -389,7 +392,7 @@ write_variables (struct pfm_writer *w, const struct dictionary *dict) struct variable *v = dict_get_var (dict, i); if (!buf_write (w, "7", 1) || !write_int (w, v->width) - || !write_string (w, v->name) + || !write_string (w, v->short_name) || !write_format (w, &v->print) || !write_format (w, &v->write)) return 0; @@ -422,7 +425,7 @@ write_value_labels (struct pfm_writer *w, const struct dictionary *dict) if (!buf_write (w, "D", 1) || !write_int (w, 1) - || !write_string (w, v->name) + || !write_string (w, v->short_name) || !write_int (w, val_labs_count (v->val_labs))) return 0; diff --git a/src/pfm-write.h b/src/pfm-write.h index f05b1606..3f82038d 100644 --- a/src/pfm-write.h +++ b/src/pfm-write.h @@ -25,8 +25,7 @@ struct file_handle; struct dictionary; struct ccase; -struct pfm_writer *pfm_open_writer (struct file_handle *, - const struct dictionary *); +struct pfm_writer *pfm_open_writer (struct file_handle *, struct dictionary *); int pfm_write_case (struct pfm_writer *, struct ccase *); void pfm_close_writer (struct pfm_writer *); diff --git a/src/recode.c b/src/recode.c index 98e6dfdd..a8574c99 100644 --- a/src/recode.c +++ b/src/recode.c @@ -412,7 +412,7 @@ cmd_recode (void) rcd->dest = dict_create_var (default_dict, rcd->dest_name, 0); if (!rcd->dest) { - /* FIXME: This can occur if a destname is duplicated. + /* FIXME: This can fail if a destname is duplicated. We could give an error at parse time but I don't care enough. */ rcd->dest = dict_lookup_var_assert (default_dict, rcd->dest_name); diff --git a/src/repeat.c b/src/repeat.c index 6f75a64b..5ca7dd9b 100644 --- a/src/repeat.c +++ b/src/repeat.c @@ -146,7 +146,7 @@ internal_cmd_do_repeat (void) if (!lex_force_id ()) return 0; for (iter = repeat_tab; iter; iter = iter->next) - if (!strcmp (iter->id, tokid)) + if (!strcasecmp (iter->id, tokid)) { msg (SE, _("Identifier %s is given twice."), tokid); return 0; diff --git a/src/sfm-read.c b/src/sfm-read.c index 59fc1840..23abde90 100644 --- a/src/sfm-read.c +++ b/src/sfm-read.c @@ -296,6 +296,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, int32 count P; } data; + unsigned long bytes; int skip = 0; @@ -306,6 +307,10 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, bswap_int32 (&data.size); bswap_int32 (&data.count); } + bytes = data.size * data.count; + if (bytes < data.size || bytes < data.count) + lose ((ME, "%s: Record type %d subtype %d too large.", + handle_get_filename (r->fh), rec_type, data.subtype)); switch (data.subtype) { @@ -361,25 +366,69 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, case 13: /* SPSS 12.0 Long variable name map */ { - - char *s; - char *buf = xmalloc(data.size * data.count + 1); - char *tbuf ; - assertive_buf_read (r, buf, data.size * data.count, 0); - buf[data.size * data.count]='\0'; - - s = strtok_r(buf, "\t", &tbuf); - while ( s ) + char *buf, *short_name, *save_ptr; + int idx; + + /* Read data. */ + buf = xmalloc (bytes + 1); + if (!buf_read (r, buf, bytes, 0)) + { + free (buf); + goto error; + } + buf[bytes] = '\0'; + + /* Parse data. */ + for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0; + short_name != NULL; + short_name = strtok_r (NULL, "=", &save_ptr), idx++) { - char *shortname, *longname; - shortname = strsep(&s,"="); - longname = strsep(&s,"="); - - dict_add_longvar_entry(*dict, shortname, longname); - - s = strtok_r(0,"\t", &tbuf); + char *long_name = strtok_r (NULL, "\t", &save_ptr); + struct variable *v; + + /* Validate long name. */ + if (long_name == NULL) + { + msg (MW, _("%s: Trailing garbage in long variable " + "name map."), + handle_get_filename (r->fh)); + break; + } + if (!var_is_valid_name (long_name, false)) + { + msg (MW, _("%s: Long variable mapping to invalid " + "variable name `%s'."), + handle_get_filename (r->fh), long_name); + break; + } + + /* Find variable using short name. */ + v = dict_lookup_var (*dict, short_name); + if (v == NULL) + { + msg (MW, _("%s: Long variable mapping for " + "nonexistent variable %s."), + handle_get_filename (r->fh), short_name); + break; + } + + /* Set long name. + Renaming a variable may clear the short + name, but we want to retain it, so + re-set it explicitly. */ + dict_rename_var (*dict, v, long_name); + var_set_short_name (v, short_name); + + /* For compatability, make sure dictionary + is in long variable name map order. In + the common case, this has no effect, + because the dictionary and the long + variable name map are already in the + same order. */ + dict_reorder_var (*dict, v, idx); } - + + /* Free data. */ free (buf); } break; @@ -791,10 +840,11 @@ read_variables (struct sfm_reader *r, name[j] = 0; /* Create variable. */ - vv = (*var_by_idx)[i] = dict_create_var_from_short (dict, name, sv.type); + vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type); if (vv == NULL) lose ((ME, _("%s: Duplicate variable name `%s' within system file."), handle_get_filename (r->fh), name)); + var_set_short_name (vv, vv->name); /* Case reading data. */ nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64)); diff --git a/src/sfm-write.c b/src/sfm-write.c index 7a5f10c4..0c8ebf01 100644 --- a/src/sfm-write.c +++ b/src/sfm-write.c @@ -106,15 +106,16 @@ var_flt64_cnt (const struct variable *v) /* Opens the system file designated by file handle FH for writing cases from dictionary D. If COMPRESS is nonzero, the - system file will be compressed. If OMIT_LONGNAMES is nonzero, the + system file will be compressed. If OMIT_LONG_NAMES is nonzero, the long name table will be omitted. No reference to D is retained, so it may be modified or - destroyed at will after this function returns. */ + destroyed at will after this function returns. D is not + modified by this function, except to assign short names. */ struct sfm_writer * sfm_open_writer (struct file_handle *fh, - const struct dictionary *d, int compress, - short omit_longnames) + struct dictionary *d, int compress, + short omit_long_names) { struct sfm_writer *w = NULL; int idx; @@ -162,6 +163,7 @@ sfm_open_writer (struct file_handle *fh, goto error; /* Write basic variable info. */ + dict_assign_short_names (d); for (i = 0; i < dict_get_var_cnt (d); i++) write_variable (w, dict_get_var (d, i)); @@ -181,19 +183,16 @@ sfm_open_writer (struct file_handle *fh, if (!write_rec_7_34 (w)) goto error; - - /* Write variable display info. */ - if ( !write_variable_display_parameters(w, d)) + if (!write_variable_display_parameters (w, d)) goto error; - - if ( ! omit_longnames ) + if (!omit_long_names) { if (!write_longvar_table (w, d)) goto error; } - /* Write record 999. */ + /* Write end-of-headers record. */ { struct { @@ -419,8 +418,7 @@ write_variable (struct sfm_writer *w, struct variable *v) sv.n_missing_values = nm; write_format_spec (&v->print, &sv.print); write_format_spec (&v->write, &sv.write); - memcpy (sv.name, v->name, strlen (v->name)); - memset (&sv.name[strlen (v->name)], ' ', SHORT_NAME_LEN - strlen (v->name)); + st_bare_pad_copy (sv.name, v->short_name, sizeof sv.name); if (!buf_write (w, &sv, sizeof sv)) return 0; @@ -617,40 +615,42 @@ write_variable_display_parameters (struct sfm_writer *w, static int write_longvar_table (struct sfm_writer *w, const struct dictionary *dict) { - char *buf = 0; - int bufsize = 0; - struct - { - int32 rec_type P; - int32 subtype P; - int32 elem_size P; - int32 n_elem P; - } lv_hdr; + { + int32 rec_type P; + int32 subtype P; + int32 elem_size P; + int32 n_elem P; + } + lv_hdr; + + struct string long_name_map; + size_t i; + + ds_init (&long_name_map, 10 * dict_get_var_cnt (dict)); + for (i = 0; i < dict_get_var_cnt (dict); i++) + { + struct variable *v = dict_get_var (dict, i); + + if (i) + ds_putc (&long_name_map, '\t'); + ds_printf (&long_name_map, "%s=%s", v->short_name, v->name); + } lv_hdr.rec_type = 7; lv_hdr.subtype = 13; lv_hdr.elem_size = 1; + lv_hdr.n_elem = ds_length (&long_name_map); - - dict_get_varname_block(dict, &buf, &bufsize); - - if ( bufsize == 0 ) - return 1; - - lv_hdr.n_elem = bufsize ; - - if (!buf_write (w, &lv_hdr, sizeof(lv_hdr) )) - goto error; - - if (!buf_write (w, buf, bufsize)) + if (!buf_write (w, &lv_hdr, sizeof lv_hdr) + || !buf_write (w, ds_data (&long_name_map), ds_length (&long_name_map))) goto error; - free (buf); + ds_destroy (&long_name_map); return 1; error: - free ( buf ) ; + ds_destroy (&long_name_map); return 0; } diff --git a/src/sfm-write.h b/src/sfm-write.h index 6795a348..dfddd4f6 100644 --- a/src/sfm-write.h +++ b/src/sfm-write.h @@ -25,8 +25,7 @@ struct file_handle; struct dictionary; struct ccase; -struct sfm_writer *sfm_open_writer (struct file_handle *, - const struct dictionary *, +struct sfm_writer *sfm_open_writer (struct file_handle *, struct dictionary *, int compress, short omit_longnames); int sfm_write_case (struct sfm_writer *, struct ccase *); diff --git a/src/str.c b/src/str.c index 089ace32..0ad0e2f5 100644 --- a/src/str.c +++ b/src/str.c @@ -95,7 +95,7 @@ mm_reverse (void *p, size_t nbytes) HAYSTACK_LEN. Returns a pointer to the needle found. */ char * mm_find_reverse (const char *haystack, size_t haystack_len, - const char *needle, size_t needle_len) + const char *needle, size_t needle_len) { int i; for (i = haystack_len - needle_len; i >= 0; i--) @@ -104,6 +104,26 @@ mm_find_reverse (const char *haystack, size_t haystack_len, return 0; } +/* Compares the SIZE bytes in A to those in B, disregarding case, + and returns a strcmp()-type result. */ +int +mm_case_compare (const void *a_, const void *b_, size_t size) +{ + const unsigned char *a = a_; + const unsigned char *b = b_; + + while (size-- > 0) + { + unsigned char ac = toupper (*a++); + unsigned char bc = toupper (*b++); + + if (ac != bc) + return ac > bc ? 1 : -1; + } + + return 0; +} + /* Compares A of length A_LEN to B of length B_LEN. The shorter string is considered to be padded with spaces to the length of the longer. */ diff --git a/src/str.h b/src/str.h index 48cad16c..c53b2f46 100644 --- a/src/str.h +++ b/src/str.h @@ -120,6 +120,7 @@ long getdelim (char **lineptr, size_t * n, int delimiter, FILE * stream); void mm_reverse (void *, size_t); char *mm_find_reverse (const char *, size_t, const char *, size_t); +int mm_case_compare (const void *, const void *, size_t); int st_compare_pad (const char *, size_t, const char *, size_t); char *st_spaces (int); diff --git a/src/sysfile-info.c b/src/sysfile-info.c index 2b463c21..860d74f2 100644 --- a/src/sysfile-info.c +++ b/src/sysfile-info.c @@ -569,7 +569,7 @@ compare_vectors_by_name (const void *a_, const void *b_) struct vector *a = *pa; struct vector *b = *pb; - return strcmp (a->name, b->name); + return strcasecmp (a->name, b->name); } /* Display a list of vectors. If SORTED is nonzero then they are diff --git a/src/t-test.q b/src/t-test.q index b33c5c28..38ea420e 100644 --- a/src/t-test.q +++ b/src/t-test.q @@ -360,14 +360,6 @@ tts_custom_groups (struct cmd_t_test *cmd UNUSED) lex_match('='); - if (token != T_ALL && - (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) - ) - { - msg(SE,_("`%s' is not a variable name"),tokid); - return 0; - } - indep_var = parse_variable (); if (!indep_var) { @@ -453,13 +445,6 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) lex_match('='); - if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) - && token != T_ALL) - { - msg(SE,_("`%s' is not a variable name"),tokid); - return 0; - } - n_vars=0; if (!parse_variables (default_dict, &vars, &n_vars, PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH)) diff --git a/src/var-display.c b/src/var-display.c index 73c0c0a8..8243ac62 100644 --- a/src/var-display.c +++ b/src/var-display.c @@ -56,11 +56,19 @@ cmd_variable_alignment (void) align = ALIGN_RIGHT; else if ( lex_match_id("CENTER")) align = ALIGN_CENTRE; - else - return CMD_FAILURE; + else + { + free (v); + return CMD_FAILURE; + } lex_force_match(')'); } + else + { + free (v); + return CMD_FAILURE; + } for( i = 0 ; i < nv ; ++i ) v[i]->alignment = align; @@ -134,12 +142,20 @@ cmd_variable_level (void) level = MEASURE_ORDINAL; else if ( lex_match_id("NOMINAL")) level = MEASURE_NOMINAL; - else - return CMD_FAILURE; + else + { + free (v); + return CMD_FAILURE; + } lex_force_match(')'); } - + else + { + free (v); + return CMD_FAILURE; + } + for( i = 0 ; i < nv ; ++i ) v[i]->measure = level ; diff --git a/src/var.h b/src/var.h index 4c1ba9ef..1d36467b 100644 --- a/src/var.h +++ b/src/var.h @@ -35,7 +35,8 @@ enum { NUMERIC, /* A numeric variable. */ - ALPHA /* A string variable. (STRING is pre-empted by lexer.h) */ + ALPHA /* A string variable. + (STRING is pre-empted by lexer.h.) */ }; /* Types of missing values. Order is significant, see @@ -62,16 +63,17 @@ enum /* A variable's dictionary entry. */ struct variable { - char name[SHORT_NAME_LEN + 1]; /* As a string. */ - char *longname; /* Pointer to entry in dictionary's table */ - int index; /* Index into its dictionary's var[]. */ + /* Basic information. */ + char name[LONG_NAME_LEN + 1]; /* Variable name. Mixed case. */ int type; /* NUMERIC or ALPHA. */ - int width; /* Size of string variables in chars. */ int fv, nv; /* Index into `value's, number of values. */ unsigned init : 1; /* 1=VFM must init and possibly reinit. */ unsigned reinit : 1; /* Cases are: 1=reinitialized; 0=left. */ + /* Data for use by containing dictionary. */ + int index; /* Dictionary index. */ + /* Missing values. */ int miss_type; /* One of the MISSING_* constants. */ union value missing[3]; /* User-missing value. */ @@ -84,40 +86,38 @@ struct variable struct val_labs *val_labs; /* Value labels. */ char *label; /* Variable label. */ - - /* GUI display parameters */ + /* GUI display parameters. */ enum measure measure; /* Nominal ordinal or continuous */ int display_width; /* Width of data editor column */ enum alignment alignment; /* Alignment of data in gui */ + /* Short name, used only for system and portable file input + and output. Upper case only. There is no index for short + names. Short names are not necessarily unique. Any + variable may have no short name, indicated by an empty + string. */ + char short_name[SHORT_NAME_LEN + 1]; + /* Per-command info. */ void *aux; void (*aux_dtor) (struct variable *); }; - -/* A tuple containing short names and longnames */ -struct name_table_entry -{ - char *longname; - char *name; -}; - +/* Variable names. */ bool var_is_valid_name (const char *, bool issue_error); int compare_var_names (const void *, const void *, void *); unsigned hash_var_name (const void *, void *); -/* Destroy and free up an nte */ -void free_nte(struct name_table_entry *nte); - - -unsigned hash_long_name (const void *e_, void *aux UNUSED) ; -int compare_long_names(const void *a_, const void *b_, void *aux); - +/* Short names. */ +void var_set_short_name (struct variable *, const char *); +void var_set_short_name_suffix (struct variable *, const char *, int suffix); +void var_clear_short_name (struct variable *); +/* Pointers to `struct variable', by name. */ int compare_var_ptr_names (const void *, const void *, void *); unsigned hash_var_ptr_name (const void *, void *); +/* Variable auxiliary data. */ void *var_attach_aux (struct variable *, void *aux, void (*aux_dtor) (struct variable *)); void var_clear_aux (struct variable *); @@ -139,12 +139,11 @@ const char *dict_class_to_name (enum dict_class dict_class); struct vector { int idx; /* Index for dict_get_vector(). */ - char name[SHORT_NAME_LEN + 1]; /* Name. */ + char name[LONG_NAME_LEN + 1]; /* Name. */ struct variable **var; /* Vector of variables. */ int cnt; /* Number of variables. */ }; - void discard_variables (void); /* This is the active file dictionary. */ diff --git a/src/vars-atr.c b/src/vars-atr.c index a3024d0c..a1b8276f 100644 --- a/src/vars-atr.c +++ b/src/vars-atr.c @@ -256,14 +256,14 @@ var_is_valid_name (const char *name, bool issue_error) if (length < 1) { if (issue_error) - msg (SE, _("Variable names must be at least 1 character long.")); + msg (SE, _("Variable name cannot be empty string.")); return false; } - else if (length > SHORT_NAME_LEN) + else if (length > LONG_NAME_LEN) { if (issue_error) msg (SE, _("Variable name %s exceeds %d-character limit."), - (int) SHORT_NAME_LEN); + (int) LONG_NAME_LEN); return false; } @@ -304,7 +304,7 @@ compare_var_names (const void *a_, const void *b_, void *foo UNUSED) const struct variable *a = a_; const struct variable *b = b_; - return strcmp (a->name, b->name); + return strcasecmp (a->name, b->name); } /* A hsh_hash_func that hashes variable V based on its name. */ @@ -313,7 +313,7 @@ hash_var_name (const void *v_, void *foo UNUSED) { const struct variable *v = v_; - return hsh_hash_string (v->name); + return hsh_hash_case_string (v->name); } /* A hsh_compare_func that orders pointers to variables A and B @@ -324,7 +324,7 @@ compare_var_ptr_names (const void *a_, const void *b_, void *foo UNUSED) struct variable *const *a = a_; struct variable *const *b = b_; - return strcmp ((*a)->name, (*b)->name); + return strcasecmp ((*a)->name, (*b)->name); } /* A hsh_hash_func that hashes pointer to variable V based on its @@ -334,5 +334,66 @@ hash_var_ptr_name (const void *v_, void *foo UNUSED) { struct variable *const *v = v_; - return hsh_hash_string ((*v)->name); + return hsh_hash_case_string ((*v)->name); +} + +/* Sets V's short_name to SHORT_NAME, truncating it to + SHORT_NAME_LEN characters and converting it to uppercase in + the process. */ +void +var_set_short_name (struct variable *v, const char *short_name) +{ + assert (v != NULL); + assert (short_name[0] == '\0' || var_is_valid_name (short_name, false)); + + st_trim_copy (v->short_name, short_name, sizeof v->short_name); + st_uppercase (v->short_name); +} + +/* Clears V's short name. */ +void +var_clear_short_name (struct variable *v) +{ + assert (v != NULL); + + v->short_name[0] = '\0'; +} + +/* Sets V's short name to BASE, followed by a suffix of the form + _A, _B, _C, ..., _AA, _AB, etc. according to the value of + SUFFIX. Truncates BASE as necessary to fit. */ +void +var_set_short_name_suffix (struct variable *v, const char *base, int suffix) +{ + char string[SHORT_NAME_LEN + 1]; + char *start, *end; + int len, ofs; + + assert (v != NULL); + assert (suffix >= 0); + assert (strlen (v->short_name) > 0); + + /* Set base name. */ + var_set_short_name (v, base); + + /* Compose suffix_string. */ + start = end = string + sizeof string - 1; + *end = '\0'; + do + { + *--start = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[suffix % 26]; + if (start <= string + 1) + msg (SE, _("Variable suffix too large.")); + suffix /= 26; + } + while (suffix > 0); + *--start = '_'; + + /* Append suffix_string to V's short name. */ + len = end - start; + if (len + strlen (v->short_name) > SHORT_NAME_LEN) + ofs = SHORT_NAME_LEN - len; + else + ofs = strlen (v->short_name); + strcpy (v->short_name + ofs, start); } diff --git a/src/vars-prs.c b/src/vars-prs.c index 2a5309f7..23f37010 100644 --- a/src/vars-prs.c +++ b/src/vars-prs.c @@ -134,7 +134,6 @@ parse_variables (const struct dictionary *d, struct variable ***var, assert (var != NULL); assert (cnt != NULL); - vs = var_set_create_from_dict (d); success = parse_var_set_vars (vs, var, cnt, opts); if ( success == 0 ) @@ -267,7 +266,7 @@ parse_var_set_vars (const struct var_set *vs, else included = NULL; -if (lex_match (T_ALL)) + if (lex_match (T_ALL)) add_variables (v, nv, &mv, included, pv_opts, vs, 0, var_set_get_cnt (vs) - 1, DC_ORDINARY); else @@ -303,6 +302,7 @@ if (lex_match (T_ALL)) first_var->name, last_var->name); goto fail; } + if (class != last_class) { msg (SE, _("When using the TO keyword to specify several " @@ -391,8 +391,8 @@ parse_DATA_LIST_vars (char ***names, int *nnames, int pv_opts) int d1, d2; int n; int nvar, mvar; - char *name1, *name2; - char *root1, *root2; + char name1[LONG_NAME_LEN + 1], name2[LONG_NAME_LEN + 1]; + char root1[LONG_NAME_LEN + 1], root2[LONG_NAME_LEN + 1]; int success = 0; assert (names != NULL); @@ -409,10 +409,6 @@ parse_DATA_LIST_vars (char ***names, int *nnames, int pv_opts) *names = NULL; } - name1 = xmalloc (4 * (SHORT_NAME_LEN + 1)); - name2 = &name1[1 * SHORT_NAME_LEN + 1]; - root1 = &name1[2 * SHORT_NAME_LEN + 1]; - root2 = &name1[3 * SHORT_NAME_LEN + 1]; do { if (token != T_ID) @@ -443,7 +439,7 @@ parse_DATA_LIST_vars (char ***names, int *nnames, int pv_opts) || !extract_num (name2, root2, &n2, &d2)) goto fail; - if (strcmp (root1, root2)) + if (strcasecmp (root1, root2)) { msg (SE, _("Prefixes don't match in use of TO convention.")); goto fail; @@ -464,8 +460,9 @@ parse_DATA_LIST_vars (char ***names, int *nnames, int pv_opts) for (n = n1; n <= n2; n++) { - (*names)[nvar] = xmalloc (SHORT_NAME_LEN + 1); - sprintf ((*names)[nvar], "%s%0*d", root1, d1, n); + char name[LONG_NAME_LEN + 1]; + sprintf (name, "%s%0*d", root1, d1, n); + (*names)[nvar] = xstrdup (name); nvar++; } } @@ -489,7 +486,6 @@ parse_DATA_LIST_vars (char ***names, int *nnames, int pv_opts) fail: *nnames = nvar; - free (name1); if (!success) { int i; @@ -662,7 +658,6 @@ struct array_var_set struct variable *const *var;/* Array of variables. */ size_t var_cnt; /* Number of elements in var. */ struct hsh_table *name_tab; /* Hash from variable names to variables. */ - struct hsh_table *longname_tab; /* Hash of short names indexed by long names */ }; /* Returns the number of variables in VS. */ @@ -689,26 +684,10 @@ array_var_set_get_var (const struct var_set *vs, size_t idx) static int array_var_set_lookup_var_idx (const struct var_set *vs, const char *name) { - char *short_name ; struct array_var_set *avs = vs->aux; struct variable v, *vp, *const *vpp; - struct name_table_entry key; - key.longname = name; - - struct name_table_entry *nte; - - assert (avs->longname_tab); - - - nte = hsh_find (avs->longname_tab, &key); - - if (!nte) - return -1; - - short_name = nte->name; - - strcpy (v.name, short_name); + strcpy (v.name, name); vp = &v; vpp = hsh_find (avs->name_tab, &vp); return vpp != NULL ? vpp - avs->var : -1; @@ -721,7 +700,6 @@ array_var_set_destroy (struct var_set *vs) struct array_var_set *avs = vs->aux; hsh_destroy (avs->name_tab); - hsh_destroy (avs->longname_tab); free (avs); free (vs); } @@ -744,36 +722,14 @@ var_set_create_from_array (struct variable *const *var, size_t var_cnt) avs->var = var; avs->var_cnt = var_cnt; avs->name_tab = hsh_create (2 * var_cnt, - compare_var_ptr_names, hash_var_ptr_name, - NULL, NULL); - - avs->longname_tab = hsh_create (2 * var_cnt, - compare_long_names, hash_long_name, - (hsh_free_func *) free_nte, - NULL); - + compare_var_ptr_names, hash_var_ptr_name, NULL, + NULL); for (i = 0; i < var_cnt; i++) - { - struct name_table_entry *nte ; - - if (hsh_insert (avs->name_tab, &var[i]) != NULL) - { - var_set_destroy (vs); - return NULL; - } - - nte = xmalloc (sizeof (*nte)); - nte->name = strdup(var[i]->name); - nte->longname = strdup(var[i]->longname); - - if (hsh_insert (avs->longname_tab, nte) != NULL) - { - var_set_destroy (vs); - free (nte); - return NULL; - } - - } - + if (hsh_insert (avs->name_tab, (void *) &var[i]) != NULL) + { + var_set_destroy (vs); + return NULL; + } + return vs; } diff --git a/src/vector.c b/src/vector.c index b6149612..558db4e0 100644 --- a/src/vector.c +++ b/src/vector.c @@ -62,7 +62,7 @@ cmd_vector (void) } for (cp2 = cp; cp2 < cp; cp2 += strlen (cp)) - if (!strcmp (cp2, tokid)) + if (!strcasecmp (cp2, tokid)) { msg (SE, _("Vector name %s is given twice."), tokid); goto fail; @@ -92,8 +92,8 @@ cmd_vector (void) { /* There's more than one vector name. */ msg (SE, _("A slash must be used to separate each vector " - "specification when using the long form. Commands " - "such as VECTOR A,B=Q1 TO Q20 are not supported.")); + "specification when using the long form. Commands " + "such as VECTOR A,B=Q1 TO Q20 are not supported.")); goto fail; } @@ -131,13 +131,13 @@ cmd_vector (void) if (!lex_force_match (')')) goto fail; - /* First check that all the generated variable names are SHORT_NAME_LEN - characters or shorter. */ + /* First check that all the generated variable names + are LONG_NAME_LEN characters or shorter. */ ndig = intlog10 (nv); for (cp = vecnames; *cp;) { int len = strlen (cp); - if (len + ndig > SHORT_NAME_LEN) + if (len + ndig > LONG_NAME_LEN) { msg (SE, _("%s%d is too long for a variable name."), cp, nv); goto fail; @@ -153,7 +153,8 @@ cmd_vector (void) sprintf (name, "%s%d", cp, i + 1); if (dict_lookup_var (default_dict, name)) { - msg (SE, _("There is already a variable named %s."), name); + msg (SE, _("There is already a variable named %s."), + name); goto fail; } } diff --git a/tests/ChangeLog b/tests/ChangeLog index bb353ca4..836eda2e 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,8 @@ +Sun May 1 23:18:37 2005 Ben Pfaff + + Most tests: changed capitalization of variable names in + definitions or in output, because now we preserve it. + Mon Apr 25 23:30:17 2005 Ben Pfaff * commands/match-files.sh: New test. diff --git a/tests/bugs/agg-crash-2.sh b/tests/bugs/agg-crash-2.sh index 01aa218b..ff2a1e5d 100755 --- a/tests/bugs/agg-crash-2.sh +++ b/tests/bugs/agg-crash-2.sh @@ -49,7 +49,7 @@ cd $TEMPDIR activity="create program" cat > $TESTFILE < $TESTFILE < $TESTFILE < $TESTFILE < $TESTFILE < $TESTFILE < agg-skel.pspp < $TESTFILE < 4 asdfk 1 3 3 end data. -autorecode x y into a b/descend. +autorecode x y into A B/descend. list. /* Just to make sure it works on second & subsequent executions, /* try it again. -compute z=trunc(y/2). -autorecode z into w. +compute Z=trunc(y/2). +autorecode z into W. list. EOF if [ $? -ne 0 ] ; then no_result ; fi diff --git a/tests/command/count.sh b/tests/command/count.sh index a792c08b..641747b7 100755 --- a/tests/command/count.sh +++ b/tests/command/count.sh @@ -50,7 +50,7 @@ cat > $TESTFILE < $TESTFILE < $TESTFILE < $TESTFILE < $TESTFILE << EOF /* Set up a dummy active file in memory. -data list /x 1 y 2. +data list /X 1 Y 2. begin data. 16 27 diff --git a/tests/command/filter.sh b/tests/command/filter.sh index 905d21ae..59cf5b3b 100755 --- a/tests/command/filter.sh +++ b/tests/command/filter.sh @@ -48,7 +48,7 @@ cd $TEMPDIR activity="create program" cat > $TESTFILE << EOF -data list notable /x 1-2. +data list notable /X 1-2. begin data. 1 2 @@ -61,7 +61,7 @@ begin data. 9 10 end data. -compute filter_$ = mod(x,2). +compute FILTER_$ = mod(x,2). filter by filter_$. list. diff --git a/tests/command/flip.sh b/tests/command/flip.sh index 8b95ec77..d8e543ea 100755 --- a/tests/command/flip.sh +++ b/tests/command/flip.sh @@ -48,7 +48,7 @@ cd $TEMPDIR activity="create flip.stat" cat > $TEMPDIR/flip.stat < $TESTFILE < $TEMPDIR/lag.stat < $TEMPDIR/loop.stat < $TESTFILE < $TESTFILE < $TEMPDIR/sample.stat < $TEMPDIR/split.stat < $TESTFILE < $TESTFILE < $TESTFILE < $TESTFILE < $TEMPDIR/tabs.stat < $TESTFILE < $TEMPDIR/filter.stat << EOF -data list notable /x 1-2. +data list notable /X 1-2. begin data. 1 2 diff --git a/tests/stats/descript-basic.sh b/tests/stats/descript-basic.sh index eaa39fcc..e904eeec 100755 --- a/tests/stats/descript-basic.sh +++ b/tests/stats/descript-basic.sh @@ -49,7 +49,7 @@ activity="create program" cat > $TEMPDIR/descript.stat < $TEMPDIR/descript.stat < $TEMPDIR/descript.stat < $TEMPDIR/prog.sps < $TEMPDIR/prog.sps < $TEMPDIR/prog.sps < $TEMPDIR/prog.sps < $TEMPDIR/prog.sps <