From 5a0b0d607efde2ab3a47d0d9c9fc62128a3156c1 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 11 Oct 2008 15:35:42 -0700 Subject: [PATCH] Implement variable and data file attributes. --- NEWS | 10 +- doc/data-io.texi | 47 +++ doc/dev/system-file-format.texi | 80 ++++ doc/files.texi | 37 +- doc/language.texi | 5 + doc/variables.texi | 106 ++++- src/data/attributes.c | 298 +++++++++++++ src/data/attributes.h | 70 ++++ src/data/automake.mk | 2 + src/data/dictionary.c | 49 ++- src/data/dictionary.h | 5 + src/data/sys-file-reader.c | 293 +++++++++---- src/data/sys-file-writer.c | 76 ++++ src/data/variable.c | 62 ++- src/data/variable.h | 7 + src/language/command.def | 3 +- src/language/dictionary/apply-dictionary.c | 7 + src/language/dictionary/attributes.c | 200 +++++++++ src/language/dictionary/automake.mk | 1 + src/language/dictionary/sys-file-info.c | 463 ++++++++++++--------- tests/automake.mk | 1 + tests/command/attributes.sh | 148 +++++++ tests/dissect-sysfile.c | 267 ++++++++---- 23 files changed, 1840 insertions(+), 397 deletions(-) create mode 100644 src/data/attributes.c create mode 100644 src/data/attributes.h create mode 100644 src/language/dictionary/attributes.c create mode 100755 tests/command/attributes.sh diff --git a/NEWS b/NEWS index bff9a787..1070638b 100644 --- a/NEWS +++ b/NEWS @@ -1,10 +1,18 @@ PSPP NEWS -- history of user-visible changes. -Time-stamp: <2008-10-09 21:32:07 blp> +Time-stamp: <2008-10-11 10:04:51 blp> Copyright (C) 1996-9, 2000, 2008 Free Software Foundation, Inc. See the end for copying conditions. Please send PSPP bug reports to bug-gnu-pspp@gnu.org. +Changes from 0.6.1 to 0.7.0: + + * Custom variable and data file attributes are now supported. + Commands VARIABLE ATTRIBUTE and DATAFILE ATTRIBUTE have been added + for setting and clear attributes. Support for attributes has also + been added to commands that read and write system files, such as + SAVE and GET, as well as to the DISPLAY command. + Changes from 0.6.0 to 0.6.1: * Statistical bug fixes: diff --git a/doc/data-io.texi b/doc/data-io.texi index b6a3a6d2..335d6c46 100644 --- a/doc/data-io.texi +++ b/doc/data-io.texi @@ -25,6 +25,7 @@ actually be read until a procedure is executed. @menu * BEGIN DATA:: Embed data within a syntax file. * CLOSE FILE HANDLE:: Close a file handle. +* DATAFILE ATTRIBUTE:: Set custom attributes on data files. * DATA LIST:: Fundamental data reading command. * END CASE:: Output the current case. * END FILE:: Terminate the current input program. @@ -89,6 +90,52 @@ DATA} and @cmd{END DATA}, cannot be closed. Attempts to close it with @cmd{CLOSE FILE HANDLE} is a PSPP extension. +@node DATAFILE ATTRIBUTE +@section DATAFILE ATTRIBUTE +@vindex DATAFILE ATTRIBUTE + +@display +DATAFILE ATTRIBUTE + ATTRIBUTE=name('value') [name('value')]@dots{} + ATTRIBUTE=name@b{[}index@b{]}('value') [name@b{[}index@b{]}('value')]@dots{} + DELETE=name [name]@dots{} + DELETE=name@b{[}index@b{]} [name@b{[}index@b{]}]@dots{} +@end display + +@cmd{DATAFILE ATTRIBUTE} adds, modifies, or removes user-defined +attributes associated with the active file. Custom data file +attributes are not interpreted by PSPP, but they are saved as part of +system files and may be used by other software that reads them. + +Use the ATTRIBUTE subcommand to add or modify a custom data file +attribute. Specify the name of the attribute as an identifier +(@pxref{Tokens}), followed by the desired value, in parentheses, as a +quoted string. Attribute names that begin with @code{$} are reserved +for PSPP's internal use, and attribute names that begin with @code{@@} +or @code{$@@} are not displayed by most PSPP commands that display +other attributes. Other attribute names are not treated specially. + +Attributes may also be organized into arrays. To assign to an array +element, add an integer array index enclosed in square brackets +(@code{[} and @code{]}) between the attribute name and value. Array +indexes start at 1, not 0. An attribute array that has a single +element (number 1) is not distinguished from a non-array attribute. + +Use the DELETE subcommand to delete an attribute. Specify an +attribute name by itself to delete an entire attribute, including all +array elements for attribute arrays. Specify an attribute name +followed by an array index in square brackets to delete a single +element of an attribute array. In the latter case, all the array +elements numbered higher than the deleted element are shifted down, +filling the vacated position. + +To associate custom attributes with particular variables, instead of +with the entire active file, use @cmd{VARIABLE ATTRIBUTE} instead. + +@cmd{DATAFILE ATTRIBUTE} takes effect immediately. It is not affected +by conditional and looping structures such as @cmd{DO IF} or +@cmd{LOOP}. + @node DATA LIST @section DATA LIST @vindex DATA LIST diff --git a/doc/dev/system-file-format.texi b/doc/dev/system-file-format.texi index 70fa385c..3e764c8c 100644 --- a/doc/dev/system-file-format.texi +++ b/doc/dev/system-file-format.texi @@ -96,6 +96,7 @@ Each type of record is described separately below. * Variable Display Parameter Record:: * Long Variable Names Record:: * Very Long String Record:: +* Data File and Variable Attributes Records:: * Miscellaneous Informational Records:: * Dictionary Termination Record:: * Data Record:: @@ -791,6 +792,85 @@ After the last tuple, there may be a single byte 00, or @{00, 09@}. The total length is @code{count} bytes. @end table +@node Data File and Variable Attributes Records +@section Data File and Variable Attributes Records + +The data file and variable attributes records represent custom +attributes for the system file or for individual variables in the +system file, as defined on the DATAFILE ATTRIBUTE (@pxref{DATAFILE +ATTRIBUTE,,,pspp, PSPP Users Guide}) and VARIABLE ATTRIBUTE commands +(@pxref{VARIABLE ATTRIBUTE,,,pspp, PSPP Users Guide}), respectively. + +@example +/* @r{Header.} */ +int32 rec_type; +int32 subtype; +int32 size; +int32 count; + +/* @r{Exactly @code{count} bytes of data.} */ +char attributes[]; +@end example + +@table @code +@item int32 rec_type; +Record type. Always set to 7. + +@item int32 subtype; +Record subtype. Always set to 17 for a data file attribute record or +to 18 for a variable attributes record. + +@item int32 size; +The size of each element in the @code{attributes} member. Always set to 1. + +@item int32 count; +The total number of bytes in @code{attributes}. + +@item char attributes[]; +The attributes, in a text-based format. + +In record type 17, this field contains a single attribute set. An +attribute set is a sequence of one or more attributes concatenated +together. Each attribute consists of a name, which has the same +syntax as a variable name, followed by, inside parentheses, a sequence +of one or more values. Each value consists of a string enclosed in +single quotes (@code{'}) followed by a line feed (byte 0x0a). A value +may contain single quote characters, which are not themselves escaped +or quoted or required to be present in pairs. There is no apparent +way to embed a line feed in a value. There is no distinction between +an attribute with a single value and an attribute array with one +element. + +In record type 18, this field contains a sequence of one or more +variable attribute sets. If more than one variable attribute set is +present, each one after the first is delimited from the previous by +@code{/}. Each variable attribute set consists of a variable name, +followed by @code{:}, followed by an attribute set with the same +syntax as on record type 17. + +The total length is @code{count} bytes. +@end table + +@subheading Example + +A system file produced with the following VARIABLE ATTRIBUTE commands +in effect: + +@example +VARIABLE ATTRIBUTE VARIABLES=dummy ATTRIBUTE=fred[1]('23') fred[2]('34'). +VARIABLE ATTRIBUTE VARIABLES=dummy ATTRIBUTE=bert('123'). +@end example + +@noindent +will contain a variable attribute record with the following contents: + +@example +00000000 07 00 00 00 12 00 00 00 01 00 00 00 22 00 00 00 |............"...| +00000010 64 75 6d 6d 79 3a 66 72 65 64 28 27 32 33 27 0a |dummy:fred('23'.| +00000020 27 33 34 27 0a 29 62 65 72 74 28 27 31 32 33 27 |'34'.)bert('123'| +00000030 0a 29 |.) | +@end example + @node Miscellaneous Informational Records @section Miscellaneous Informational Records diff --git a/doc/files.texi b/doc/files.texi index 30a023ae..ae10ec7a 100644 --- a/doc/files.texi +++ b/doc/files.texi @@ -39,25 +39,46 @@ Only variables with names that exist in both the active file and the system file are considered. Variables with the same name but different types (numeric, string) will cause an error message. Otherwise, the system file variables' attributes will replace those in their matching -active file variables, as described below. +active file variables: +@itemize @bullet +@item If a system file variable has a variable label, then it will replace the active file variable's variable label. If the system file variable does not have a variable label, then the active file variable's variable -label, if any, will be retained. +label, if any, will be retained. + +@item +If the system file variable has custom attributes (@pxref{VARIABLE +ATTRIBUTE}), then those attributes replace the active file variable's +custom attributes. If the system file variable does not have custom +attributes, then the active file variable's custom attributes, if any, +will be retained. +@item If the active file variable is numeric or short string, then value labels and missing values, if any, will be copied to the active file variable. If the system file variable does not have value labels or missing values, then those in the active file variable, if any, will not be disturbed. +@end itemize -Finally, weighting of the active file is updated (@pxref{WEIGHT}). If -the active file has a weighting variable, and the system file does not, -or if the weighting variable in the system file does not exist in the -active file, then the active file weighting variable, if any, is -retained. Otherwise, the weighting variable in the system file becomes -the active file weighting variable. +In addition to properties of variables, some properties of the active +file dictionary as a whole are updated: + +@itemize @bullet +@item +If the system file has custom attributes (@pxref{DATAFILE ATTRIBUTE}), +then those attributes replace the active file variable's custom +attributes. + +@item +If the active file has a weighting variable (@pxref{WEIGHT}), and the +system file does not, or if the weighting variable in the system file +does not exist in the active file, then the active file weighting +variable, if any, is retained. Otherwise, the weighting variable in +the system file becomes the active file weighting variable. +@end itemize @cmd{APPLY DICTIONARY} takes effect immediately. It does not read the active diff --git a/doc/language.texi b/doc/language.texi index 13454336..ed1123db 100644 --- a/doc/language.texi +++ b/doc/language.texi @@ -497,6 +497,11 @@ they are displayed. Example: a width of 8, with 2 decimal places. @item Write format Similar to print format, but used by the @cmd{WRITE} command (@pxref{WRITE}). + +@cindex custom attributes +@item Custom attributes +User-defined associations between names and values. @xref{VARIABLE +ATTRIBUTE}. @end table @node System Variables diff --git a/doc/variables.texi b/doc/variables.texi index a66e4232..95a5b67b 100644 --- a/doc/variables.texi +++ b/doc/variables.texi @@ -7,8 +7,7 @@ several utility functions for examining and adjusting them. @menu * ADD VALUE LABELS:: Add value labels to variables. * DELETE VARIABLES:: Delete variables. -* DISPLAY:: Display variable names & descriptions. -* DISPLAY VECTORS:: Display a list of vectors. +* DISPLAY:: Display information about the active file. * FORMATS:: Set print and write formats. * LEAVE:: Don't clear variables between cases. * MISSING VALUES:: Set missing values for variables. @@ -18,6 +17,7 @@ several utility functions for examining and adjusting them. * RENAME VARIABLES:: Rename variables. * VALUE LABELS:: Set value labels for variables. * STRING:: Create new string variables. +* VARIABLE ATTRIBUTE:: Set custom attributes on variables. * VARIABLE LABELS:: Set variable labels for variables. * VARIABLE ALIGNMENT:: Set the alignment for display. * VARIABLE WIDTH:: Set the display width. @@ -61,15 +61,26 @@ effect, it causes the temporary transformations to become permanent. @vindex DISPLAY @display -DISPLAY @{NAMES,INDEX,LABELS,VARIABLES,DICTIONARY,SCRATCH@} - [SORTED] [var_list] +DISPLAY [SORTED] NAMES [[/VARIABLES=]var_list]. +DISPLAY [SORTED] INDEX [[/VARIABLES=]var_list]. +DISPLAY [SORTED] LABELS [[/VARIABLES=]var_list]. +DISPLAY [SORTED] VARIABLES [[/VARIABLES=]var_list]. +DISPLAY [SORTED] DICTIONARY [[/VARIABLES=]var_list]. +DISPLAY [SORTED] SCRATCH [[/VARIABLES=]var_list]. +DISPLAY [SORTED] ATTRIBUTES [[/VARIABLES=]var_list]. +DISPLAY [SORTED] @@ATTRIBUTES [[/VARIABLES=]var_list]. +DISPLAY [SORTED] VECTORS. @end display -@cmd{DISPLAY} displays requested information on variables. Variables can -optionally be sorted alphabetically. The entire dictionary or just -specified variables can be described. +@cmd{DISPLAY} displays information about the active file. A variety +of different forms of information can be requested. -One of the following keywords can be present: +The following keywords primarily cause information about variables to +be displayed. With these keywords, by default information is +displayed about all variable in the active file, in the order that +variables occur in the active file dictionary. The SORTED keyword +causes output to be sorted alphabetically by variable name. The +VARIABLES subcommand limits output to the specified variables. @table @asis @item NAMES @@ -91,23 +102,24 @@ Variable names, positions, print and write formats, missing values, variable labels, and value labels are displayed. @item SCRATCH -Varible names are displayed, for scratch variables only (@pxref{Scratch +Variable names are displayed, for scratch variables only (@pxref{Scratch Variables}). -@end table -If SORTED is specified, then the variables are displayed in ascending -order based on their names; otherwise, they are displayed in the order -that they occur in the active file dictionary. +@item ATTRIBUTES +Datafile and variable attributes are displayed, except that attributes +whose names begin with @code{@@} or @code{$@@} are omitted. -@node DISPLAY VECTORS -@section DISPLAY VECTORS -@vindex DISPLAY VECTORS +@itemx @@ATTRIBUTES +All datafile and variable attributes are displayed. +@end table -@display -DISPLAY VECTORS. -@end display +With the @code{VECTOR} keyword, @cmd{DISPLAY} lists all the currently +declared vectors. If the SORTED keyword is given, the vectors are +listed in alphabetical order; otherwise, they are listed in textual +order of definition within the PSPP syntax file. -@cmd{DISPLAY VECTORS} lists all the currently declared vectors. +For related commands, see @ref{DISPLAY DOCUMENTS} and @ref{DISPLAY +FILE LABEL}. @node FORMATS @section FORMATS @@ -357,6 +369,60 @@ implicitly derived from the specified output formats. Created variables are initialized to spaces. +@node VARIABLE ATTRIBUTE +@section VARIABLE ATTRIBUTE +@vindex VARIABLE ATTRIBUTE + +@display +VARIABLE ATTRIBUTE + VARIABLES=var_list + ATTRIBUTE=name('value') [name('value')]@dots{} + ATTRIBUTE=name@b{[}index@b{]}('value') [name@b{[}index@b{]}('value')]@dots{} + DELETE=name [name]@dots{} + DELETE=name@b{[}index@b{]} [name@b{[}index@b{]}]@dots{} +@end display + +@cmd{VARIABLE ATTRIBUTE} adds, modifies, or removes user-defined +attributes associated with variables in the active file. Custom +variable attributes are not interpreted by PSPP, but they are saved as +part of system files and may be used by other software that reads +them. + +The required VARIABLES subcommand must come first. Specify the +variables to which the following ATTRIBUTE or DELETE subcommand +should apply. + +Use the ATTRIBUTE subcommand to add or modify custom variable +attributes. Specify the name of the attribute as an identifier +(@pxref{Tokens}), followed by the desired value, in parentheses, as a +quoted string. The specified attributes are then added or modified in +the variables specified on VARIABLES. Attribute names that begin with +@code{$} are reserved for PSPP's internal use, and attribute names +that begin with @code{@@} or @code{$@@} are not displayed by most PSPP +commands that display other attributes. Other attribute names are not +treated specially. + +Attributes may also be organized into arrays. To assign to an array +element, add an integer array index enclosed in square brackets +(@code{[} and @code{]}) between the attribute name and value. Array +indexes start at 1, not 0. An attribute array that has a single +element (number 1) is not distinguished from a non-array attribute. + +Use the DELETE subcommand to delete an attribute from the variable +specified on VARIABLES. Specify an attribute name by itself to delete +an entire attribute, including all array elements for attribute +arrays. Specify an attribute name followed by an array index in +square brackets to delete a single element of an attribute array. In +the latter case, all the array elements numbered higher than the +deleted element are shifted down, filling the vacated position. + +To associate custom attributes with the entire active file, instead of +with particular variables, use @cmd{DATAFILE ATTRIBUTE} instead. + +@cmd{VARIABLE ATTRIBUTE} takes effect immediately. It is not affected +by conditional and looping structures such as @cmd{DO IF} or +@cmd{LOOP}. + @node VARIABLE LABELS @section VARIABLE LABELS @vindex VARIABLE LABELS diff --git a/src/data/attributes.c b/src/data/attributes.c new file mode 100644 index 00000000..aa128290 --- /dev/null +++ b/src/data/attributes.c @@ -0,0 +1,298 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2008 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include +#include +#include +#include +#include +#include "xalloc.h" + +/* A custom attribute of the sort maintained by the DATAFILE + ATTRIBUTE and VARIABLE ATTRIBUTE commands. + + Attributes have a name (the rules for which are the same as + those for PSPP variable names) and one or more values, each of + which is a string. An attribute may be part of one attribute + set. */ +struct attribute + { + struct hmap_node node; /* Used by attrset. */ + char *name; /* Name. */ + char **values; /* Each value. */ + size_t n_values; /* Number of values. */ + size_t allocated_values; /* Amount of allocated space for values. */ + }; + +/* Creates and returns a new attribute with the given NAME. The + attribute initially has no values. (Attributes with no values + cannot be saved to system files, so at least one value should + be added before the attribute is made available to the PSPP + user.) */ +struct attribute * +attribute_create (const char *name) +{ + struct attribute *attr = xmalloc (sizeof *attr); + attr->name = xstrdup (name); + attr->values = NULL; + attr->n_values = 0; + attr->allocated_values = 0; + return attr; +} + +/* Creates and returns a new attribute with the same name and + values as ORIG. */ +struct attribute * +attribute_clone (const struct attribute *orig) +{ + struct attribute *attr; + size_t i; + + attr = attribute_create (orig->name); + for (i = 0; i < orig->n_values; i++) + attribute_add_value (attr, orig->values[i]); + return attr; +} + +/* Destroys ATTR and frees all associated memory. + + This function must not be called if ATTR is part of an + attribute set. Use attrset_delete() instead. */ +void +attribute_destroy (struct attribute *attr) +{ + if (attr != NULL) + { + size_t i; + + for (i = 0; i < attr->n_values; i++) + free (attr->values[i]); + free (attr->values); + free (attr->name); + free (attr); + } +} + +/* Returns the name of ATTR. The caller must not free or modify + the returned string. */ +const char * +attribute_get_name (const struct attribute *attr) +{ + return attr->name; +} + +/* Returns ATTR's value with the given INDEX, or a null pointer + if INDEX is greater than or equal to the number of values in + ATTR (that is, attributes are numbered starting from 0). The + caller must not free or modify the returned string. */ +const char * +attribute_get_value (const struct attribute *attr, size_t index) +{ + return index < attr->n_values ? attr->values[index] : NULL; +} + +/* Returns ATTR's number of values. */ +size_t +attribute_get_n_values (const struct attribute *attrs) +{ + return attrs->n_values; +} + +/* Adds a copy of VALUE as a new value to ATTR. The caller + retains ownership of VALUE. */ +void +attribute_add_value (struct attribute *attr, const char *value) +{ + if (attr->n_values >= attr->allocated_values) + attr->values = x2nrealloc (attr->values, &attr->allocated_values, + sizeof *attr->values); + attr->values[attr->n_values++] = xstrdup (value); +} + +/* Adds or replaces the value with the given INDEX in ATTR by a + copy of VALUE. The caller retains ownership of VALUE. + + If INDEX is an existing value index, that value is replaced. + If no value index numbered INDEX exists in ATTR, then it is + added, and any values intermediate between the last maximum + index and INDEX are set to the empty string. */ +void +attribute_set_value (struct attribute *attr, size_t index, const char *value) +{ + if (index < attr->n_values) + { + /* Replace existing value. */ + free (attr->values[index]); + attr->values[index] = xstrdup (value); + } + else + { + /* Add new value. */ + while (index > attr->n_values) + attribute_add_value (attr, ""); + attribute_add_value (attr, value); + } + +} + +/* Deletes the value with the given INDEX from ATTR. Any values + with higher-numbered indexes are shifted down into the gap + that this creates. + + If INDEX is greater than the maximum index, this has no effect.*/ +void +attribute_del_value (struct attribute *attr, size_t index) +{ + if (index < attr->n_values) + { + free (attr->values[index]); + remove_element (attr->values, attr->n_values, sizeof *attr->values, + index); + attr->n_values--; + } +} + +/* Initializes SET as a new, initially empty attibute set. */ +void +attrset_init (struct attrset *set) +{ + hmap_init (&set->map); +} + +/* Initializes NEW_SET as a new attribute set whose contents are + initially the same as that of OLD_SET. */ +void +attrset_clone (struct attrset *new_set, const struct attrset *old_set) +{ + struct attribute *old_attr; + + attrset_init (new_set); + HMAP_FOR_EACH (old_attr, struct attribute, node, &old_set->map) + { + struct attribute *new_attr = attribute_clone (old_attr); + hmap_insert (&new_set->map, &new_attr->node, + hmap_node_hash (&old_attr->node)); + } +} + +/* Frees the storage associated with SET, if SET is nonnull. + (Does not free SET itself.) */ +void +attrset_destroy (struct attrset *set) +{ + if (set != NULL) + { + struct attribute *attr, *next; + + HMAP_FOR_EACH_SAFE (attr, next, struct attribute, node, &set->map) + attribute_destroy (attr); + hmap_destroy (&set->map); + } +} + +/* Returns the number of attributes in SET. */ +size_t +attrset_count (const struct attrset *set) +{ + return hmap_count (&set->map); +} + +/* Returns the attribute in SET whose name matches NAME + case-insensitively, or a null pointer if SET does not contain + an attribute with that name. */ +struct attribute * +attrset_lookup (struct attrset *set, const char *name) +{ + struct attribute *attr; + HMAP_FOR_EACH_WITH_HASH (attr, struct attribute, node, + hsh_hash_case_string (name), &set->map) + if (!strcasecmp (attribute_get_name (attr), name)) + break; + return attr; +} + +/* Adds ATTR to SET, which must not already contain an attribute + with the same name (matched case insensitively). Ownership of + ATTR is transferred to SET. */ +void +attrset_add (struct attrset *set, struct attribute *attr) +{ + const char *name = attribute_get_name (attr); + assert (attrset_lookup (set, name) == NULL); + hmap_insert (&set->map, &attr->node, hsh_hash_case_string (name)); +} + +/* Deletes any attribute from SET that matches NAME + (case-insensitively). */ +void +attrset_delete (struct attrset *set, const char *name) +{ + struct attribute *attr = attrset_lookup (set, name); + if (attr != NULL) + { + hmap_delete (&set->map, &attr->node); + attribute_destroy (attr); + } +} + +/* Deletes all attributes from SET. */ +void +attrset_clear (struct attrset *set) +{ + attrset_destroy (set); + attrset_init (set); +} + +static struct attribute *iterator_data (struct attrset_iterator *iterator) +{ + return HMAP_NULLABLE_DATA (iterator->node, struct attribute, node); +} + +/* Returns the first attribute in SET and initializes ITERATOR. + If SET is empty, returns a null pointer. + + The caller must not destroy the returned attribute, but it may + add or remove values. + + Attributes are visited in no particular order. Calling + attrset_add() during iteration can cause some attributes to + be visited more than once and others not at all. */ +struct attribute * +attrset_first (const struct attrset *set, struct attrset_iterator *iterator) +{ + iterator->node = hmap_first (&set->map); + return iterator_data (iterator); +} + +/* Returns the next attribute in SET and advances ITERATOR, which + should have been initialized by calling attrset_first(). If + all the attributes in SET have already been visited, returns a + null pointer. + + The caller must not destroy the returned attribute, but it may + add or remove values. + + Attributes are visited in no particular order. Calling + attrset_add() during iteration can cause some attributes to + be visited more than once and others not at all. */ +struct attribute * +attrset_next (const struct attrset *set, struct attrset_iterator *iterator) +{ + iterator->node = hmap_next (&set->map, iterator->node); + return iterator_data (iterator); +} diff --git a/src/data/attributes.h b/src/data/attributes.h new file mode 100644 index 00000000..87cb7726 --- /dev/null +++ b/src/data/attributes.h @@ -0,0 +1,70 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2008 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef DATA_ATTRIBUTES_H +#define DATA_ATTRIBUTES_H 1 + +#include + +/* This header supports custom attribute of the sort maintained + by the DATAFILE ATTRIBUTE and VARIABLE ATTRIBUTE commands. + + Attributes have a name (the rules for which are the same as + those for PSPP variable names) and one or more values, each of + which is a string. An attribute may be part of one attribute + set. + + An attribute set is an unordered collection of attributes + with names that are unique (case-insensitively). */ + +struct attribute *attribute_create (const char *name); +struct attribute *attribute_clone (const struct attribute *); +void attribute_destroy (struct attribute *); + +const char *attribute_get_name (const struct attribute *); +const char *attribute_get_value (const struct attribute *, size_t index); +void attribute_add_value (struct attribute *, const char *); +void attribute_set_value (struct attribute *, size_t index, const char *); +void attribute_del_value (struct attribute *, size_t index); +size_t attribute_get_n_values (const struct attribute *); + +struct attrset + { + struct hmap map; + }; + +void attrset_init (struct attrset *); +void attrset_clone (struct attrset *, const struct attrset *); +void attrset_destroy (struct attrset *); + +size_t attrset_count (const struct attrset *); + +struct attribute *attrset_lookup (struct attrset *, const char *); +void attrset_add (struct attrset *, struct attribute *); +void attrset_delete (struct attrset *, const char *); +void attrset_clear (struct attrset *); + +struct attrset_iterator + { + struct hmap_node *node; + }; +struct attribute *attrset_first (const struct attrset *, + struct attrset_iterator *); +struct attribute *attrset_next (const struct attrset *, + struct attrset_iterator *); + + +#endif /* data/attributes.h */ diff --git a/src/data/automake.mk b/src/data/automake.mk index 5f25c1d2..bd161dba 100644 --- a/src/data/automake.mk +++ b/src/data/automake.mk @@ -10,6 +10,8 @@ src_data_libdata_la_SOURCES = \ src/data/any-reader.h \ src/data/any-writer.c \ src/data/any-writer.h \ + src/data/attributes.c \ + src/data/attributes.h \ src/data/calendar.c \ src/data/calendar.h \ src/data/case-map.c \ diff --git a/src/data/dictionary.c b/src/data/dictionary.c index 526bb280..caf8e2fc 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -21,14 +21,15 @@ #include #include -#include "case.h" -#include "category.h" -#include "identifier.h" -#include "settings.h" -#include "value-labels.h" -#include "vardict.h" -#include "variable.h" -#include "vector.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -61,6 +62,7 @@ struct dictionary struct string documents; /* Documents, as a string. */ struct vector **vector; /* Vectors of variables. */ size_t vector_cnt; /* Number of vectors. */ + struct attrset attributes; /* Custom attributes. */ const struct dict_callbacks *callbacks; /* Callbacks on dictionary modification */ void *cb_data ; /* Data passed to callbacks */ @@ -115,6 +117,7 @@ dict_create (void) d->name_tab = hsh_create (8, compare_vars_by_name, hash_var_by_name, NULL, NULL); + attrset_init (&d->attributes); return d; } @@ -178,6 +181,8 @@ dict_clone (const struct dictionary *s) for (i = 0; i < s->vector_cnt; i++) d->vector[i] = vector_clone (s->vector[i], s, d); + dict_set_attributes (d, dict_get_attributes (s)); + return d; } @@ -208,6 +213,7 @@ dict_clear (struct dictionary *d) d->label = NULL; ds_destroy (&d->documents); dict_clear_vectors (d); + attrset_clear (&d->attributes); } /* Destroys the aux data for every variable in D, by calling @@ -235,6 +241,7 @@ dict_destroy (struct dictionary *d) dict_clear (d); hsh_destroy (d->name_tab); + attrset_destroy (&d->attributes); free (d); } } @@ -1285,6 +1292,32 @@ dict_clear_vectors (struct dictionary *d) d->vector_cnt = 0; } +/* Returns D's attribute set. The caller may examine or modify + the attribute set, but must not destroy it. Destroying D or + calling dict_set_attributes for D will also destroy D's + attribute set. */ +struct attrset * +dict_get_attributes (const struct dictionary *d) +{ + return (struct attrset *) &d->attributes; +} + +/* Replaces D's attributes set by a copy of ATTRS. */ +void +dict_set_attributes (struct dictionary *d, const struct attrset *attrs) +{ + attrset_destroy (&d->attributes); + attrset_clone (&d->attributes, attrs); +} + +/* Returns true if D has at least one attribute in its attribute + set, false if D's attribute set is empty. */ +bool +dict_has_attributes (const struct dictionary *d) +{ + return attrset_count (&d->attributes) > 0; +} + /* Called from variable.c to notify the dictionary that some property of the variable has changed */ void diff --git a/src/data/dictionary.h b/src/data/dictionary.h index 0ab259d3..bb14f52e 100644 --- a/src/data/dictionary.h +++ b/src/data/dictionary.h @@ -142,6 +142,11 @@ const struct vector *dict_lookup_vector (const struct dictionary *, const char *name); void dict_clear_vectors (struct dictionary *); +/* Attributes. */ +struct attrset *dict_get_attributes (const struct dictionary *); +void dict_set_attributes (struct dictionary *, const struct attrset *); +bool dict_has_attributes (const struct dictionary *); + /* Functions to be called upon dictionary changes. */ struct dict_callbacks { diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index d5b5b15f..c80bd5f5 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -98,9 +99,11 @@ static struct variable *lookup_var_by_value_idx (struct sfm_reader *, struct variable **, int value_idx); +static void sys_msg (struct sfm_reader *r, int class, + const char *format, va_list args) + PRINTF_FORMAT (3, 0); static void sys_warn (struct sfm_reader *, const char *, ...) PRINTF_FORMAT (2, 3); - static void sys_error (struct sfm_reader *, const char *, ...) PRINTF_FORMAT (2, 3) NO_RETURN; @@ -112,15 +115,23 @@ static double read_float (struct sfm_reader *); static void read_string (struct sfm_reader *, char *, size_t); static void skip_bytes (struct sfm_reader *, size_t); -static struct variable_to_value_map *open_variable_to_value_map ( - struct sfm_reader *, size_t size); -static void close_variable_to_value_map (struct sfm_reader *r, - struct variable_to_value_map *); -static bool read_variable_to_value_map (struct sfm_reader *, - struct dictionary *, - struct variable_to_value_map *, - struct variable **var, char **value, - int *warning_cnt); +static struct text_record *open_text_record (struct sfm_reader *, size_t size); +static void close_text_record (struct sfm_reader *r, + struct text_record *); +static bool read_variable_to_value_pair (struct sfm_reader *, + struct dictionary *, + struct text_record *, + struct variable **var, char **value); +static void text_warn (struct sfm_reader *r, struct text_record *text, + const char *format, ...) + PRINTF_FORMAT (3, 4); +static char *text_get_token (struct text_record *, + struct substring delimiters); +static bool text_match (struct text_record *, char c); +static bool text_read_short_name (struct sfm_reader *, struct dictionary *, + struct text_record *, + struct substring delimiters, + struct variable **); static bool close_reader (struct sfm_reader *r); @@ -163,7 +174,12 @@ static void read_long_var_name_map (struct sfm_reader *, static void read_long_string_map (struct sfm_reader *, size_t size, size_t count, struct dictionary *); - +static void read_data_file_attributes (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); +static void read_variable_attributes (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); /* Opens the system file designated by file handle FH for reading. Reads the system file's dictionary into *DICT. @@ -748,9 +764,12 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict, break; case 17: - /* Text field that defines variable attributes. New in - SPSS 14. */ - break; + read_data_file_attributes (r, size, count, dict); + return; + + case 18: + read_variable_attributes (r, size, count, dict); + return; case 20: /* New in SPSS 16. Contains a single string that describes @@ -927,14 +946,12 @@ static void read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count, struct dictionary *dict) { - struct variable_to_value_map *map; + struct text_record *text; struct variable *var; char *long_name; - int warning_cnt = 0; - map = open_variable_to_value_map (r, size * count); - while (read_variable_to_value_map (r, dict, map, &var, &long_name, - &warning_cnt)) + text = open_text_record (r, size * count); + while (read_variable_to_value_pair (r, dict, text, &var, &long_name)) { char **short_names; size_t short_name_cnt; @@ -980,7 +997,7 @@ read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count, } free (short_names); } - close_variable_to_value_map (r, map); + close_text_record (r, text); r->has_long_var_names = true; } @@ -990,14 +1007,12 @@ static void read_long_string_map (struct sfm_reader *r, size_t size, size_t count, struct dictionary *dict) { - struct variable_to_value_map *map; + struct text_record *text; struct variable *var; char *length_s; - int warning_cnt = 0; - map = open_variable_to_value_map (r, size * count); - while (read_variable_to_value_map (r, dict, map, &var, &length_s, - &warning_cnt)) + text = open_text_record (r, size * count); + while (read_variable_to_value_pair (r, dict, text, &var, &length_s)) { size_t idx = var_get_dict_index (var); long int length; @@ -1045,7 +1060,7 @@ read_long_string_map (struct sfm_reader *r, size_t size, size_t count, dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1); var_set_width (var, length); } - close_variable_to_value_map (r, map); + close_text_record (r, text); dict_compact_values (dict); } @@ -1183,6 +1198,96 @@ read_value_labels (struct sfm_reader *r, pool_destroy (subpool); } + +/* Reads a set of custom attributes from TEXT into ATTRS. + ATTRS may be a null pointer, in which case the attributes are + read but discarded. */ +static void +read_attributes (struct sfm_reader *r, struct text_record *text, + struct attrset *attrs) +{ + do + { + struct attribute *attr; + char *key; + int index; + + /* Parse the key. */ + key = text_get_token (text, ss_cstr ("(")); + if (key == NULL) + return; + + attr = attribute_create (key); + for (index = 1; ; index++) + { + /* Parse the value. */ + char *value; + size_t length; + + value = text_get_token (text, ss_cstr ("\n")); + if (value == NULL) + { + text_warn (r, text, _("Error parsing attribute value %s[%d]"), + key, index); + break; + } + + length = strlen (value); + if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'') + { + value[length - 1] = '\0'; + attribute_add_value (attr, value + 1); + } + else + { + text_warn (r, text, + _("Attribute value %s[%d] is not quoted: %s"), + key, index, value); + attribute_add_value (attr, value); + } + + /* Was this the last value for this attribute? */ + if (text_match (text, ')')) + break; + } + if (attrs != NULL) + attrset_add (attrs, attr); + else + attribute_destroy (attr); + } + while (!text_match (text, '/')); +} + +/* Reads record type 7, subtype 17, which lists custom + attributes on the data file. */ +static void +read_data_file_attributes (struct sfm_reader *r, + size_t size, size_t count, + struct dictionary *dict) +{ + struct text_record *text = open_text_record (r, size * count); + read_attributes (r, text, dict_get_attributes (dict)); + close_text_record (r, text); +} + +/* Reads record type 7, subtype 18, which lists custom + attributes on individual variables. */ +static void +read_variable_attributes (struct sfm_reader *r, + size_t size, size_t count, + struct dictionary *dict) +{ + struct text_record *text = open_text_record (r, size * count); + for (;;) + { + struct variable *var; + if (!text_read_short_name (r, dict, text, ss_cstr (":"), &var)) + break; + read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL); + } + close_text_record (r, text); +} + /* Case reader. */ @@ -1518,82 +1623,124 @@ lookup_var_by_short_name (struct dictionary *d, const char *short_name) return NULL; } -/* Helpers for reading records that contain "variable=value" - pairs. */ +/* Helpers for reading records that contain structured text + strings. */ + +/* Maximum number of warnings to issue for a single text + record. */ +#define MAX_TEXT_WARNINGS 5 /* State. */ -struct variable_to_value_map +struct text_record { struct substring buffer; /* Record contents. */ size_t pos; /* Current position in buffer. */ + int n_warnings; /* Number of warnings issued or suppressed. */ }; -/* Reads SIZE bytes into a "variable=value" map for R, - and returns the map. */ -static struct variable_to_value_map * -open_variable_to_value_map (struct sfm_reader *r, size_t size) +/* Reads SIZE bytes into a text record for R, + and returns the new text record. */ +static struct text_record * +open_text_record (struct sfm_reader *r, size_t size) { - struct variable_to_value_map *map = pool_alloc (r->pool, sizeof *map); + struct text_record *text = pool_alloc (r->pool, sizeof *text); char *buffer = pool_malloc (r->pool, size + 1); read_bytes (r, buffer, size); - map->buffer = ss_buffer (buffer, size); - map->pos = 0; - return map; + text->buffer = ss_buffer (buffer, size); + text->pos = 0; + text->n_warnings = 0; + return text; } -/* Closes MAP and frees its storage. - Not really needed, because the pool will free the map anyway, - but can be used to free it earlier. */ +/* Closes TEXT, frees its storage, and issues a final warning + about suppressed warnings if necesary. */ static void -close_variable_to_value_map (struct sfm_reader *r, - struct variable_to_value_map *map) +close_text_record (struct sfm_reader *r, struct text_record *text) { - pool_free (r->pool, ss_data (map->buffer)); + if (text->n_warnings > MAX_TEXT_WARNINGS) + sys_warn (r, _("Suppressed %d additional related warnings."), + text->n_warnings - MAX_TEXT_WARNINGS); + pool_free (r->pool, ss_data (text->buffer)); } -/* Reads the next variable=value pair from MAP. +/* Reads a variable=value pair from TEXT. Looks up the variable in DICT and stores it into *VAR. Stores a null-terminated value into *VALUE. */ static bool -read_variable_to_value_map (struct sfm_reader *r, struct dictionary *dict, - struct variable_to_value_map *map, - struct variable **var, char **value, - int *warning_cnt) +read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict, + struct text_record *text, + struct variable **var, char **value) { - int max_warnings = 5; - for (;;) { - struct substring short_name_ss, value_ss; + if (!text_read_short_name (r, dict, text, ss_cstr ("="), var)) + return false; + + *value = text_get_token (text, ss_buffer ("\t\0", 2)); + if (*value == NULL) + return false; - if (!ss_tokenize (map->buffer, ss_cstr ("="), &map->pos, &short_name_ss) - || !ss_tokenize (map->buffer, ss_buffer ("\t\0", 2), &map->pos, - &value_ss)) - { - if (*warning_cnt > max_warnings) - sys_warn (r, _("Suppressed %d additional variable map warnings."), - *warning_cnt - max_warnings); - return false; - } + text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX), + ss_buffer ("\t\0", 2)); - map->pos += ss_span (ss_substr (map->buffer, map->pos, SIZE_MAX), - ss_buffer ("\t\0", 2)); + if (*var != NULL) + return true; + } +} - ss_data (short_name_ss)[ss_length (short_name_ss)] = '\0'; - *var = lookup_var_by_short_name (dict, ss_data (short_name_ss)); - if (*var == NULL) - { - if (++*warning_cnt <= max_warnings) - sys_warn (r, _("Variable map refers to unknown variable %s."), - ss_data (short_name_ss)); - continue; - } +static bool +text_read_short_name (struct sfm_reader *r, struct dictionary *dict, + struct text_record *text, struct substring delimiters, + struct variable **var) +{ + char *short_name = text_get_token (text, delimiters); + if (short_name == NULL) + return false; - ss_data (value_ss)[ss_length (value_ss)] = '\0'; - *value = ss_data (value_ss); + *var = lookup_var_by_short_name (dict, short_name); + if (*var == NULL) + text_warn (r, text, _("Variable map refers to unknown variable %s."), + short_name); + return true; +} + +/* Displays a warning for the current file position, limiting the + number to MAX_TEXT_WARNINGS for TEXT. */ +static void +text_warn (struct sfm_reader *r, struct text_record *text, + const char *format, ...) +{ + if (text->n_warnings++ < MAX_TEXT_WARNINGS) + { + va_list args; + va_start (args, format); + sys_msg (r, MW, format, args); + va_end (args); + } +} + +static char * +text_get_token (struct text_record *text, struct substring delimiters) +{ + struct substring token; + + if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token)) + return NULL; + ss_data (token)[ss_length (token)] = '\0'; + return ss_data (token); +} + +static bool +text_match (struct text_record *text, char c) +{ + if (text->buffer.string[text->pos] == c) + { + text->pos++; return true; } + else + return false; } /* Messages. */ diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index 7804e591..aa539a51 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include @@ -111,6 +112,11 @@ static void write_variable_display_parameters (struct sfm_writer *w, static void write_documents (struct sfm_writer *, const struct dictionary *); +static void write_data_file_attributes (struct sfm_writer *, + const struct dictionary *); +static void write_variable_attributes (struct sfm_writer *, + const struct dictionary *); + static void write_int (struct sfm_writer *, int32_t); static inline void convert_double_to_output_format (double, uint8_t[8]); static void write_float (struct sfm_writer *, double); @@ -235,6 +241,10 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, write_vls_length_table (w, d); + if (attrset_count (dict_get_attributes (d))) + write_data_file_attributes (w, d); + write_variable_attributes (w, d); + /* Write end-of-headers record. */ write_int (w, 999); write_int (w, 0); @@ -520,6 +530,72 @@ write_documents (struct sfm_writer *w, const struct dictionary *d) write_bytes (w, dict_get_documents (d), line_cnt * DOC_LINE_LENGTH); } +static void +put_attrset (struct string *string, const struct attrset *attrs) +{ + const struct attribute *attr; + struct attrset_iterator i; + + for (attr = attrset_first (attrs, &i); attr != NULL; + attr = attrset_next (attrs, &i)) + { + size_t n_values = attribute_get_n_values (attr); + size_t j; + + ds_put_cstr (string, attribute_get_name (attr)); + ds_put_char (string, '('); + for (j = 0; j < n_values; j++) + ds_put_format (string, "'%s'\n", attribute_get_value (attr, j)); + ds_put_char (string, ')'); + } +} + +static void +write_attribute_record (struct sfm_writer *w, const struct string *content, + int subtype) +{ + write_int (w, 7); + write_int (w, subtype); + write_int (w, 1); + write_int (w, ds_length (content)); + write_bytes (w, ds_data (content), ds_length (content)); +} + +static void +write_data_file_attributes (struct sfm_writer *w, + const struct dictionary *d) +{ + struct string s = DS_EMPTY_INITIALIZER; + put_attrset (&s, dict_get_attributes (d)); + write_attribute_record (w, &s, 17); + ds_destroy (&s); +} + +static void +write_variable_attributes (struct sfm_writer *w, const struct dictionary *d) +{ + struct string s = DS_EMPTY_INITIALIZER; + size_t n_vars = dict_get_var_cnt (d); + size_t n_attrsets = 0; + size_t i; + + for (i = 0; i < n_vars; i++) + { + struct variable *v = dict_get_var (d, i); + struct attrset *attrs = var_get_attributes (v); + if (attrset_count (attrs)) + { + if (n_attrsets++) + ds_put_char (&s, '/'); + ds_put_format (&s, "%s:", var_get_short_name (v, 0)); + put_attrset (&s, attrs); + } + } + if (n_attrsets) + write_attribute_record (w, &s, 18); + ds_destroy (&s); +} + /* Write the alignment, width and scale values. */ static void write_variable_display_parameters (struct sfm_writer *w, diff --git a/src/data/variable.c b/src/data/variable.c index e39692a0..d14237d9 100644 --- a/src/data/variable.c +++ b/src/data/variable.c @@ -19,14 +19,15 @@ #include -#include "category.h" -#include "data-out.h" -#include "format.h" -#include "dictionary.h" -#include "identifier.h" -#include "missing-values.h" -#include "value-labels.h" -#include "vardict.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -76,6 +77,9 @@ struct variable vectors with binary entries, so any variable of type ALPHA will have its values stored here. */ struct cat_vals *obs_vals; + + /* Custom attributes. */ + struct attrset attributes; }; /* Creates and returns a new variable with the given NAME and @@ -108,6 +112,7 @@ var_create (const char *name, int width) v->aux = NULL; v->aux_dtor = NULL; v->obs_vals = NULL; + attrset_init (&v->attributes); return v; } @@ -138,6 +143,7 @@ var_clone (const struct variable *old_var) var_set_display_width (new_var, var_get_display_width (old_var)); var_set_alignment (new_var, var_get_alignment (old_var)); var_set_leave (new_var, var_get_leave (old_var)); + var_set_attributes (new_var, var_get_attributes (old_var)); return new_var; } @@ -328,6 +334,20 @@ compare_var_ptrs_by_name (const void *a_, const void *b_, return strcasecmp (var_get_name (*a), var_get_name (*b)); } +/* A hsh_compare_func that orders pointers to variables A and B + by their dictionary indexes. */ +int +compare_var_ptrs_by_dict_index (const void *a_, const void *b_, + const void *aux UNUSED) +{ + struct variable *const *a = a_; + struct variable *const *b = b_; + size_t a_index = var_get_dict_index (*a); + size_t b_index = var_get_dict_index (*b); + + return a_index < b_index ? -1 : a_index > b_index; +} + /* A hsh_hash_func that hashes pointer to variable V based on its name. */ unsigned @@ -599,7 +619,6 @@ var_append_value_name (const struct variable *v, const union value *value, ds_put_cstr (str, name); } - /* Print and write formats. */ /* Returns V's print format specification. */ @@ -1021,6 +1040,31 @@ var_has_obs_vals (const struct variable *v) return v->obs_vals != NULL; } +/* Returns variable V's attribute set. The caller may examine or + modify the attribute set, but must not destroy it. Destroying + V, or calling var_set_attributes() on V, will also destroy its + attribute set. */ +struct attrset * +var_get_attributes (const struct variable *v) +{ + return (struct attrset *) &v->attributes; +} + +/* Replaces variable V's attributes set by a copy of ATTRS. */ +void +var_set_attributes (struct variable *v, const struct attrset *attrs) +{ + attrset_destroy (&v->attributes); + attrset_clone (&v->attributes, attrs); +} + +/* Returns true if V has any custom attributes, false if it has none. */ +bool +var_has_attributes (const struct variable *v) +{ + return attrset_count (&v->attributes) > 0; +} + /* Returns V's vardict structure. */ const struct vardict_info * var_get_vardict (const struct variable *v) diff --git a/src/data/variable.h b/src/data/variable.h index d2820d34..ecfa6b76 100644 --- a/src/data/variable.h +++ b/src/data/variable.h @@ -50,6 +50,8 @@ unsigned hash_var_by_name (const void *, const void *); int compare_var_ptrs_by_name (const void *, const void *, const void *); unsigned hash_var_ptr_by_name (const void *, const void *); +int compare_var_ptrs_by_dict_index (const void *, const void *, const void *); + /* Types and widths of values associated with a variable. */ enum val_type var_get_type (const struct variable *); int var_get_width (const struct variable *); @@ -175,6 +177,11 @@ struct cat_vals *var_get_obs_vals (const struct variable *); void var_set_obs_vals (const struct variable *, struct cat_vals *); bool var_has_obs_vals (const struct variable *); +/* Custom attributes. */ +struct attrset *var_get_attributes (const struct variable *); +void var_set_attributes (struct variable *, const struct attrset *); +bool var_has_attributes (const struct variable *); + /* Function types. */ typedef bool var_predicate_func (const struct variable *); diff --git a/src/language/command.def b/src/language/command.def index c2cc7f62..442234d9 100644 --- a/src/language/command.def +++ b/src/language/command.def @@ -53,6 +53,7 @@ DEF_CMD (S_DATA | S_INPUT_PROGRAM, 0, "ADD DOCUMENT", cmd_add_documents) DEF_CMD (S_DATA | S_INPUT_PROGRAM, 0, "APPLY DICTIONARY", cmd_apply_dictionary) DEF_CMD (S_DATA | S_INPUT_PROGRAM, 0, "BREAK", cmd_break) DEF_CMD (S_DATA | S_INPUT_PROGRAM, 0, "COMPUTE", cmd_compute) +DEF_CMD (S_DATA | S_INPUT_PROGRAM, 0, "DATAFILE ATTRIBUTE", cmd_datafile_attribute) DEF_CMD (S_DATA | S_INPUT_PROGRAM, 0, "DISPLAY", cmd_display) DEF_CMD (S_DATA | S_INPUT_PROGRAM, F_KEEP_FINAL_TOKEN, "DOCUMENT", cmd_document) DEF_CMD (S_DATA | S_INPUT_PROGRAM, 0, "DO IF", cmd_do_if) @@ -79,6 +80,7 @@ DEF_CMD (S_DATA | S_INPUT_PROGRAM, 0, "SPLIT FILE", cmd_split_file) DEF_CMD (S_DATA | S_INPUT_PROGRAM, 0, "STRING", cmd_string) DEF_CMD (S_DATA | S_INPUT_PROGRAM, 0, "VALUE LABELS", cmd_value_labels) DEF_CMD (S_DATA | S_INPUT_PROGRAM, 0, "VARIABLE ALIGNMENT", cmd_variable_alignment) +DEF_CMD (S_DATA | S_INPUT_PROGRAM, 0, "VARIABLE ATTRIBUTE", cmd_variable_attribute) DEF_CMD (S_DATA | S_INPUT_PROGRAM, 0, "VARIABLE LABELS", cmd_variable_labels) DEF_CMD (S_DATA | S_INPUT_PROGRAM, 0, "VARIABLE LEVEL", cmd_variable_level) DEF_CMD (S_DATA | S_INPUT_PROGRAM, 0, "VARIABLE WIDTH", cmd_variable_width) @@ -165,7 +167,6 @@ UNIMPL_CMD ("CSSELECT", "Select complex samples") UNIMPL_CMD ("CSTABULATE", "Tabulate complex samples") UNIMPL_CMD ("CTABLES", "Display complex samples") UNIMPL_CMD ("CURVEFIT", "Fit curve to line plot") -UNIMPL_CMD ("DATAFILE ATTRIBUTE", "User defined datafile attributes") UNIMPL_CMD ("DATASET", "Alternate data set") UNIMPL_CMD ("DATE", "Create time series data") UNIMPL_CMD ("DEFINE", "Syntax macros") diff --git a/src/language/dictionary/apply-dictionary.c b/src/language/dictionary/apply-dictionary.c index ff4d7853..a1b33101 100644 --- a/src/language/dictionary/apply-dictionary.c +++ b/src/language/dictionary/apply-dictionary.c @@ -120,12 +120,19 @@ cmd_apply_dictionary (struct lexer *lexer, struct dataset *ds) var_set_print_format (t, var_get_print_format (s)); var_set_write_format (t, var_get_write_format (s)); } + + if (var_has_attributes (s)) + var_set_attributes (t, var_get_attributes (s)); } if (!n_matched) msg (SW, _("No matching variables found between the source " "and target files.")); + /* Data file attributes. */ + if (dict_has_attributes (dict)) + dict_set_attributes (dataset_dict (ds), dict_get_attributes (dict)); + /* Weighting. */ if (dict_get_weight (dict) != NULL) { diff --git a/src/language/dictionary/attributes.c b/src/language/dictionary/attributes.c new file mode 100644 index 00000000..9e9b8085 --- /dev/null +++ b/src/language/dictionary/attributes.c @@ -0,0 +1,200 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2008 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xalloc.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +static enum cmd_result parse_attributes (struct lexer *, struct attrset **, + size_t n); + +/* Parses the DATAFILE ATTRIBUTE command. */ +int +cmd_datafile_attribute (struct lexer *lexer, struct dataset *ds) +{ + struct attrset *set = dict_get_attributes (dataset_dict (ds)); + return parse_attributes (lexer, &set, 1); +} + +/* Parses the VARIABLE ATTRIBUTE command. */ +int +cmd_variable_attribute (struct lexer *lexer, struct dataset *ds) +{ + do + { + struct variable **vars; + struct attrset **sets; + size_t n_vars, i; + bool ok; + + if (!lex_force_match_id (lexer, "VARIABLES") + || !lex_force_match (lexer, '=') + || !parse_variables (lexer, dataset_dict (ds), &vars, &n_vars, + PV_NONE)) + return CMD_FAILURE; + + sets = xmalloc (n_vars * sizeof *sets); + for (i = 0; i < n_vars; i++) + sets[i] = var_get_attributes (vars[i]); + + ok = parse_attributes (lexer, sets, n_vars); + free (vars); + free (sets); + if (!ok) + return CMD_FAILURE; + } + while (lex_match (lexer, '/')); + + return lex_end_of_command (lexer); +} + +static bool +match_subcommand (struct lexer *lexer, const char *keyword) +{ + if (lex_token (lexer) == T_ID + && lex_id_match (ss_cstr (lex_tokid (lexer)), ss_cstr (keyword)) + && lex_look_ahead (lexer) == '=') + { + lex_get (lexer); /* Skip keyword. */ + lex_get (lexer); /* Skip '='. */ + return true; + } + else + return false; +} + +static bool +parse_attribute_name (struct lexer *lexer, char name[VAR_NAME_LEN + 1], + size_t *index) +{ + if (!lex_force_id (lexer)) + return false; + strcpy (name, lex_tokid (lexer)); + lex_get (lexer); + + if (lex_match (lexer, '[')) + { + if (!lex_force_int (lexer)) + return false; + if (lex_integer (lexer) < 1 || lex_integer (lexer) > 65535) + { + msg (SE, _("Attribute array index must be between 1 and 65535.")); + return false; + } + *index = lex_integer (lexer); + lex_get (lexer); + if (!lex_force_match (lexer, ']')) + return false; + } + else + *index = 0; + return true; +} + +static bool +add_attribute (struct lexer *lexer, struct attrset **sets, size_t n) +{ + char name[VAR_NAME_LEN + 1]; + size_t index, i; + char *value; + + if (!parse_attribute_name (lexer, name, &index) + || !lex_force_match (lexer, '(') + || !lex_force_string (lexer)) + return false; + value = ds_cstr (lex_tokstr (lexer)); + + for (i = 0; i < n; i++) + { + struct attribute *attr = attrset_lookup (sets[i], name); + if (attr == NULL) + { + attr = attribute_create (name); + attrset_add (sets[i], attr); + } + attribute_set_value (attr, index ? index - 1 : 0, value); + } + + lex_get (lexer); + return lex_force_match (lexer, ')'); +} + +static bool +delete_attribute (struct lexer *lexer, struct attrset **sets, size_t n) +{ + char name[VAR_NAME_LEN + 1]; + size_t index, i; + + if (!parse_attribute_name (lexer, name, &index)) + return false; + + for (i = 0; i < n; i++) + { + struct attrset *set = sets[i]; + if (index == 0) + attrset_delete (set, name); + else + { + struct attribute *attr = attrset_lookup (set, name); + if (attr != NULL) + { + attribute_del_value (attr, index - 1); + if (attribute_get_n_values (attr) == 0) + attrset_delete (set, name); + } + } + } + return true; +} + +static enum cmd_result +parse_attributes (struct lexer *lexer, struct attrset **sets, size_t n) +{ + enum { UNKNOWN, ADD, DELETE } command = UNKNOWN; + do + { + if (match_subcommand (lexer, "ATTRIBUTE")) + command = ADD; + else if (match_subcommand (lexer, "DELETE")) + command = DELETE; + else if (command == UNKNOWN) + { + lex_error (lexer, _("expecting ATTRIBUTE= or DELETE=")); + return CMD_FAILURE; + } + + if (!(command == ADD + ? add_attribute (lexer, sets, n) + : delete_attribute (lexer, sets, n))) + return CMD_FAILURE; + } + while (lex_token (lexer) != '/' && lex_token (lexer) != '.'); + return CMD_SUCCESS; +} diff --git a/src/language/dictionary/automake.mk b/src/language/dictionary/automake.mk index 825832a2..2aa91842 100644 --- a/src/language/dictionary/automake.mk +++ b/src/language/dictionary/automake.mk @@ -1,6 +1,7 @@ ## Process this file with automake to produce Makefile.in -*- makefile -*- language_dictionary_sources = \ + src/language/dictionary/attributes.c \ src/language/dictionary/apply-dictionary.c \ src/language/dictionary/delete-variables.c \ src/language/dictionary/formats.c \ diff --git a/src/language/dictionary/sys-file-info.c b/src/language/dictionary/sys-file-info.c index b98854db..d6279159 100644 --- a/src/language/dictionary/sys-file-info.c +++ b/src/language/dictionary/sys-file-info.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -48,19 +49,22 @@ #include "gettext.h" #define _(msgid) gettext (msgid) -/* Constants for DISPLAY utility. */ -enum +/* Information to include in displaying a dictionary. */ +enum { - AS_NAMES = 0, - AS_INDEX, - AS_VARIABLES, - AS_LABELS, - AS_DICTIONARY, - AS_SCRATCH, - AS_VECTOR + DF_DICT_INDEX = 1 << 0, + DF_FORMATS = 1 << 1, + DF_VALUE_LABELS = 1 << 2, + DF_VARIABLE_LABELS = 1 << 3, + DF_MISSING_VALUES = 1 << 4, + DF_AT_ATTRIBUTES = 1 << 5, /* Attributes whose names begin with @. */ + DF_ATTRIBUTES = 1 << 6, /* All other attributes. */ + DF_MISC = 1 << 7, + DF_ALL = (1 << 8) - 1 }; -static int describe_variable (const struct variable *v, struct tab_table *t, int r, int as); +static int describe_variable (const struct variable *v, struct tab_table *t, + int r, int pc, int flags); /* Sets the widths of all the columns and heights of all the rows in table T for driver D. */ @@ -87,8 +91,7 @@ cmd_sysfile_info (struct lexer *lexer, struct dataset *ds UNUSED) struct tab_table *t; struct casereader *reader; struct sfm_read_info info; - int r, nr; - int i; + int r, i; lex_match_id (lexer, "FILE"); lex_match (lexer, '='); @@ -153,9 +156,7 @@ cmd_sysfile_info (struct lexer *lexer, struct dataset *ds UNUSED) tab_dim (t, tab_natural_dimensions); tab_submit (t); - nr = 1 + 2 * dict_get_var_cnt (d); - - t = tab_create (4, nr, 1); + t = tab_create (4, 1 + 2 * dict_get_var_cnt (d), 1); tab_dim (t, sysfile_info_dim); tab_headers (t, 0, 0, 1, 0); tab_text (t, 0, 0, TAB_LEFT | TAT_TITLE, _("Variable")); @@ -163,19 +164,8 @@ cmd_sysfile_info (struct lexer *lexer, struct dataset *ds UNUSED) tab_text (t, 3, 0, TAB_LEFT | TAT_TITLE, _("Position")); tab_hline (t, TAL_2, 0, 3, 1); for (r = 1, i = 0; i < dict_get_var_cnt (d); i++) - { - struct variable *v = dict_get_var (d, i); - const int nvl = val_labs_count (var_get_value_labels (v)); - - if (r + 13 + nvl > nr) - { - nr = MAX (nr * dict_get_var_cnt (d) / (i + 1), nr); - nr += 10 + nvl; - tab_realloc (t, 4, nr); - } - - r = describe_variable (v, t, r, AS_DICTIONARY); - } + r = describe_variable (dict_get_var (d, i), t, r, 3, + DF_ALL & ~DF_AT_ATTRIBUTES); tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, 3, r); tab_vline (t, TAL_1, 1, 0, r); @@ -197,6 +187,7 @@ static void display_macros (void); static void display_documents (const struct dictionary *dict); static void display_variables (const struct variable **, size_t, int); static void display_vectors (const struct dictionary *dict, int sorted); +static void display_data_file_attributes (struct attrset *, int flags); int cmd_display (struct lexer *lexer, struct dataset *ds) @@ -228,73 +219,81 @@ cmd_display (struct lexer *lexer, struct dataset *ds) } else { - static const char *sbc[] = - {"NAMES", "INDEX", "VARIABLES", "LABELS", - "DICTIONARY", "SCRATCH", "VECTORS", NULL}; - const char **cp; - int as; + int flags; sorted = lex_match_id (lexer, "SORTED"); - for (cp = sbc; *cp; cp++) - if (lex_token (lexer) == T_ID - && lex_id_match (ss_cstr (*cp), ss_cstr (lex_tokid (lexer)))) - { - lex_get (lexer); - break; - } - as = cp - sbc; - - if (*cp == NULL) - as = AS_NAMES; - - if (as == AS_VECTOR) + if (lex_match_id (lexer, "VECTORS")) { display_vectors (dataset_dict(ds), sorted); - return CMD_SUCCESS; + return lex_end_of_command (lexer); } + else if (lex_match_id (lexer, "SCRATCH")) + { + dict_get_vars (dataset_dict (ds), &vl, &n, DC_ORDINARY); + flags = 0; + } + else + { + struct subcommand + { + const char *name; + int flags; + }; + static const struct subcommand subcommands[] = + { + {"@ATTRIBUTES", DF_ATTRIBUTES | DF_AT_ATTRIBUTES}, + {"ATTRIBUTES", DF_ATTRIBUTES}, + {"DICTIONARY", DF_ALL & ~DF_AT_ATTRIBUTES}, + {"INDEX", DF_DICT_INDEX}, + {"LABELS", DF_DICT_INDEX | DF_VARIABLE_LABELS}, + {"NAMES", 0}, + {"VARIABLES", + DF_DICT_INDEX | DF_FORMATS | DF_MISSING_VALUES | DF_MISC}, + {NULL, 0}, + }; + const struct subcommand *sbc; + + flags = 0; + for (sbc = subcommands; sbc->name != NULL; sbc++) + if (lex_match_id (lexer, sbc->name)) + { + flags = sbc->flags; + break; + } + + lex_match (lexer, '/'); + lex_match_id (lexer, "VARIABLES"); + lex_match (lexer, '='); + + if (lex_token (lexer) != '.') + { + if (!parse_variables_const (lexer, dataset_dict (ds), &vl, &n, + PV_NONE)) + { + free (vl); + return CMD_FAILURE; + } + } + else + dict_get_vars (dataset_dict (ds), &vl, &n, 0); + } - lex_match (lexer, '/'); - lex_match_id (lexer, "VARIABLES"); - lex_match (lexer, '='); - - if (lex_token (lexer) != '.') - { - if (!parse_variables_const (lexer, dataset_dict (ds), &vl, &n, PV_NONE)) - { - free (vl); - return CMD_FAILURE; - } - as = AS_DICTIONARY; - } + if (n > 0) + { + sort (vl, n, sizeof *vl, + (sorted + ? compare_var_ptrs_by_name + : compare_var_ptrs_by_dict_index), NULL); + display_variables (vl, n, flags); + } else - dict_get_vars (dataset_dict (ds), &vl, &n, 0); - - if (as == AS_SCRATCH) - { - size_t i, m; - for (i = 0, m = n; i < n; i++) - if (dict_class_from_id (var_get_name (vl[i])) != DC_SCRATCH) - { - vl[i] = NULL; - m--; - } - as = AS_NAMES; - n = m; - } - - if (n == 0) - { - msg (SW, _("No variables to display.")); - return CMD_FAILURE; - } - - if (sorted) - sort (vl, n, sizeof *vl, compare_var_ptrs_by_name, NULL); - - display_variables (vl, n, as); - + msg (SW, _("No variables to display.")); free (vl); + + if (flags & (DF_ATTRIBUTES | DF_AT_ATTRIBUTES)) + display_data_file_attributes (dict_get_attributes (dataset_dict (ds)), + flags); } return lex_end_of_command (lexer); @@ -333,7 +332,7 @@ display_documents (const struct dictionary *dict) } } -static int _as; +static int _flags; /* Sets the widths of all the columns and heights of all the rows in table T for driver D. */ @@ -344,14 +343,16 @@ variables_dim (struct tab_table *t, struct outp_driver *d) int i; t->w[0] = tab_natural_width (t, d, 0); - if (_as == AS_DICTIONARY || _as == AS_VARIABLES || _as == AS_LABELS) + if (_flags & (DF_VALUE_LABELS | DF_VARIABLE_LABELS | DF_MISSING_VALUES + | DF_AT_ATTRIBUTES | DF_ATTRIBUTES)) { t->w[1] = MAX (tab_natural_width (t, d, 1), d->prop_em_width * 5); t->w[2] = MAX (tab_natural_width (t, d, 2), d->prop_em_width * 35); pc = 3; } - else pc = 1; - if (_as != AS_NAMES) + else + pc = 1; + if (_flags & DF_DICT_INDEX) t->w[pc] = tab_natural_width (t, d, pc); for (i = 0; i < t->nr; i++) @@ -359,155 +360,220 @@ variables_dim (struct tab_table *t, struct outp_driver *d) } static void -display_variables (const struct variable **vl, size_t n, int as) +display_variables (const struct variable **vl, size_t n, int flags) { - const struct variable **vp = vl; /* Variable pointer. */ struct tab_table *t; int nc; /* Number of columns. */ - int nr; /* Number of rows. */ int pc; /* `Position column' */ int r; /* Current row. */ size_t i; - _as = as; - switch (as) - { - case AS_INDEX: - nc = 2; - break; - case AS_NAMES: - nc = 1; - break; - default: - nc = 4; - break; - } + _flags = flags; + + /* One column for the name, + two columns for general description, + one column for dictionary index. */ + nc = 1; + if (flags & ~DF_DICT_INDEX) + nc += 2; + pc = nc; + if (flags & DF_DICT_INDEX) + nc++; t = tab_create (nc, n + 5, 1); tab_headers (t, 0, 0, 1, 0); - nr = n + 5; tab_hline (t, TAL_2, 0, nc - 1, 1); tab_text (t, 0, 0, TAB_LEFT | TAT_TITLE, _("Variable")); - pc = (as == AS_INDEX ? 1 : 3); - if (as != AS_NAMES) + if (flags & ~DF_DICT_INDEX) + tab_joint_text (t, 1, 0, 2, 0, TAB_LEFT | TAT_TITLE, + (flags & ~(DF_DICT_INDEX | DF_VARIABLE_LABELS) + ? _("Description") : _("Label"))); + if (flags & DF_DICT_INDEX) tab_text (t, pc, 0, TAB_LEFT | TAT_TITLE, _("Position")); - if (as == AS_DICTIONARY || as == AS_VARIABLES) - tab_joint_text (t, 1, 0, 2, 0, TAB_LEFT | TAT_TITLE, _("Description")); - else if (as == AS_LABELS) - tab_joint_text (t, 1, 0, 2, 0, TAB_LEFT | TAT_TITLE, _("Label")); tab_dim (t, variables_dim); - for (i = r = 1; i <= n; i++) - { - const struct variable *v; - - while (*vp == NULL) - vp++; - v = *vp++; - - if (as == AS_DICTIONARY || as == AS_VARIABLES) - { - int nvl = val_labs_count (var_get_value_labels (v)); - - if (r + 13 + nvl > nr) - { - nr = MAX (nr * n / (i + 1), nr); - nr += 10 + nvl; - tab_realloc (t, nc, nr); - } - - r = describe_variable (v, t, r, as); - } else { - tab_text (t, 0, r, TAB_LEFT, var_get_name (v)); - if (as == AS_LABELS) - { - const char *label = var_get_label (v); - tab_joint_text (t, 1, r, 2, r, TAB_LEFT, - label == NULL ? "(no label)" : label); - } - if (as != AS_NAMES) - { - tab_text (t, pc, r, TAT_PRINTF, "%zu", - var_get_dict_index (v) + 1); - tab_hline (t, TAL_1, 0, nc - 1, r); - } - r++; - } - } - tab_hline (t, as == AS_NAMES ? TAL_1 : TAL_2, 0, nc - 1, 1); - if (as != AS_NAMES) + r = 1; + for (i = 0; i < n; i++) + r = describe_variable (vl[i], t, r, pc, flags); + tab_hline (t, flags & ~DF_DICT_INDEX ? TAL_2 : TAL_1, 0, nc - 1, 1); + if (flags) { tab_box (t, TAL_1, TAL_1, -1, -1, 0, 0, nc - 1, r - 1); tab_vline (t, TAL_1, 1, 0, r - 1); } else tab_flags (t, SOMF_NO_TITLE); - if (as == AS_DICTIONARY || as == AS_VARIABLES || as == AS_LABELS) - tab_vline (t, TAL_1, 3, 0, r - 1); + if (flags & ~DF_DICT_INDEX) + tab_vline (t, TAL_1, nc - 1, 0, r - 1); tab_resize (t, -1, r); tab_columns (t, TAB_COL_DOWN, 1); tab_submit (t); } -/* Puts a description of variable V into table T starting at row R. - The variable will be described in the format AS. Returns the next - row available for use in the table. */ +static bool +is_at_name (const char *name) +{ + return name[0] == '@' || (name[0] == '$' && name[1] == '@'); +} + +static size_t +count_attributes (const struct attrset *set, int flags) +{ + struct attrset_iterator i; + struct attribute *attr; + size_t n_attrs; + + n_attrs = 0; + for (attr = attrset_first (set, &i); attr != NULL; + attr = attrset_next (set, &i)) + if (flags & DF_AT_ATTRIBUTES || !is_at_name (attribute_get_name (attr))) + n_attrs += attribute_get_n_values (attr); + return n_attrs; +} + +static void +display_attributes (struct tab_table *t, const struct attrset *set, int flags, + int c, int r) +{ + struct attrset_iterator i; + struct attribute *attr; + + for (attr = attrset_first (set, &i); attr != NULL; + attr = attrset_next (set, &i)) + { + const char *name = attribute_get_name (attr); + size_t n_values; + size_t i; + + if (!(flags & DF_AT_ATTRIBUTES) && is_at_name (name)) + continue; + + n_values = attribute_get_n_values (attr); + for (i = 0; i < n_values; i++) + { + if (n_values > 1) + tab_text (t, c, r, TAB_LEFT | TAT_PRINTF, "%s[%d]", + name, i + 1); + else + tab_text (t, c, r, TAB_LEFT, name); + tab_text (t, c + 1, r, TAB_LEFT, attribute_get_value (attr, i)); + r++; + } + } +} + +static void +display_data_file_attributes (struct attrset *set, int flags) +{ + struct tab_table *t; + size_t n_attrs; + + n_attrs = count_attributes (set, flags); + if (!n_attrs) + return; + + t = tab_create (2, n_attrs + 1, 0); + tab_headers (t, 0, 0, 1, 0); + tab_box (t, TAL_1, TAL_1, -1, TAL_1, 0, 0, tab_nc (t) - 1, tab_nr (t) - 1); + tab_hline (t, TAL_2, 0, 1, 1); + tab_text (t, 0, 0, TAB_LEFT | TAT_TITLE, _("Attribute")); + tab_text (t, 1, 0, TAB_LEFT | TAT_TITLE, _("Value")); + display_attributes (t, set, flags, 0, 1); + tab_columns (t, TAB_COL_DOWN, 1); + tab_dim (t, tab_natural_dimensions); + tab_title (t, "Custom data file attributes."); + tab_submit (t); +} + +/* Puts a description of variable V into table T starting at row + R. The variable will be described in the format given by + FLAGS. Returns the next row available for use in the + table. */ static int -describe_variable (const struct variable *v, struct tab_table *t, int r, int as) +describe_variable (const struct variable *v, struct tab_table *t, int r, + int pc, int flags) { - const struct fmt_spec *print = var_get_print_format (v); - const struct fmt_spec *write = var_get_write_format (v); - enum measure m = var_get_measure (v); - enum alignment a = var_get_alignment (v); + size_t n_attrs = 0; + int need_rows; + + /* Make sure that enough rows are allocated. */ + need_rows = 1; + if (flags & ~(DF_DICT_INDEX | DF_VARIABLE_LABELS)) + need_rows += 15; + if (flags & DF_VALUE_LABELS) + need_rows += val_labs_count (var_get_value_labels (v)); + if (flags & (DF_ATTRIBUTES | DF_AT_ATTRIBUTES)) + { + n_attrs = count_attributes (var_get_attributes (v), flags); + need_rows += n_attrs; + } + if (r + need_rows > tab_nr (t)) + { + int nr = MAX (r + need_rows, tab_nr (t) * 2); + tab_realloc (t, -1, nr); + } /* Put the name, var label, and position into the first row. */ tab_text (t, 0, r, TAB_LEFT, var_get_name (v)); - tab_text (t, 3, r, TAT_PRINTF, "%zu", var_get_dict_index (v) + 1); + if (flags & DF_DICT_INDEX) + tab_text (t, pc, r, TAT_PRINTF, "%zu", var_get_dict_index (v) + 1); - if (as == AS_DICTIONARY && var_has_label (v)) + if (flags & DF_VARIABLE_LABELS && var_has_label (v)) { tab_joint_text (t, 1, r, 2, r, TAB_LEFT, var_get_label (v)); r++; } /* Print/write format, or print and write formats. */ - if (fmt_equal (print, write)) + if (flags & DF_FORMATS) { - char str[FMT_STRING_LEN_MAX + 1]; - tab_joint_text (t, 1, r, 2, r, TAB_LEFT | TAT_PRINTF, _("Format: %s"), - fmt_to_string (print, str)); - r++; + const struct fmt_spec *print = var_get_print_format (v); + const struct fmt_spec *write = var_get_write_format (v); + + if (fmt_equal (print, write)) + { + char str[FMT_STRING_LEN_MAX + 1]; + tab_joint_text (t, 1, r, 2, r, TAB_LEFT | TAT_PRINTF, + _("Format: %s"), fmt_to_string (print, str)); + r++; + } + else + { + char str[FMT_STRING_LEN_MAX + 1]; + tab_joint_text (t, 1, r, 2, r, TAB_LEFT | TAT_PRINTF, + _("Print Format: %s"), fmt_to_string (print, str)); + r++; + tab_joint_text (t, 1, r, 2, r, TAB_LEFT | TAT_PRINTF, + _("Write Format: %s"), fmt_to_string (write, str)); + r++; + } } - else + + /* Measurement level, display width, alignment. */ + if (flags & DF_MISC) { - char str[FMT_STRING_LEN_MAX + 1]; + enum measure m = var_get_measure (v); + enum alignment a = var_get_alignment (v); + tab_joint_text (t, 1, r, 2, r, TAB_LEFT | TAT_PRINTF, - _("Print Format: %s"), fmt_to_string (print, str)); + _("Measure: %s"), + m == MEASURE_NOMINAL ? _("Nominal") + : m == MEASURE_ORDINAL ? _("Ordinal") + : _("Scale")); r++; tab_joint_text (t, 1, r, 2, r, TAB_LEFT | TAT_PRINTF, - _("Write Format: %s"), fmt_to_string (write, str)); + _("Display Alignment: %s"), + a == ALIGN_LEFT ? _("Left") + : a == ALIGN_CENTRE ? _("Center") + : _("Right")); + r++; + tab_joint_text (t, 1, r, 2, r, TAB_LEFT | TAT_PRINTF, + _("Display Width: %d"), var_get_display_width (v)); r++; } - - /* Measurement level, display width, alignment. */ - tab_joint_text (t, 1, r, 2, r, TAB_LEFT | TAT_PRINTF, - _("Measure: %s"), - m == MEASURE_NOMINAL ? _("Nominal") - : m == MEASURE_ORDINAL ? _("Ordinal") - : _("Scale")); - r++; - tab_joint_text (t, 1, r, 2, r, TAB_LEFT | TAT_PRINTF, - _("Display Alignment: %s"), - a == ALIGN_LEFT ? _("Left") - : a == ALIGN_CENTRE ? _("Center") - : _("Right")); - r++; - tab_joint_text (t, 1, r, 2, r, TAB_LEFT | TAT_PRINTF, - _("Display Width: %d"), var_get_display_width (v)); - r++; - + /* Missing values if any. */ - if (var_has_missing_values (v)) + if (flags & DF_MISSING_VALUES && var_has_missing_values (v)) { char buf[128]; char *cp; @@ -552,7 +618,7 @@ describe_variable (const struct variable *v, struct tab_table *t, int r, int as) } /* Value labels. */ - if (as == AS_DICTIONARY && var_has_value_labels (v)) + if (flags & DF_VALUE_LABELS && var_has_value_labels (v)) { const struct val_labs *val_labs = var_get_value_labels (v); struct val_labs_iterator *i; @@ -587,8 +653,17 @@ describe_variable (const struct variable *v, struct tab_table *t, int r, int as) tab_vline (t, TAL_1, 2, orig_r, r - 1); } + if (flags & (DF_ATTRIBUTES | DF_AT_ATTRIBUTES) && n_attrs) + { + tab_joint_text (t, 1, r, 2, r, TAB_LEFT, "Custom attributes:"); + r++; + + display_attributes (t, var_get_attributes (v), flags, 1, r); + r += n_attrs; + } + /* Draw a line below the last row of information on this variable. */ - tab_hline (t, TAL_1, 0, 3, r); + tab_hline (t, TAL_1, 0, tab_nc (t) - 1, r); return r; } diff --git a/tests/automake.mk b/tests/automake.mk index dfd49170..d8dd300e 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -8,6 +8,7 @@ TESTS_ENVIRONMENT += CHARSETALIASDIR='$(abs_top_builddir)/gl' dist_TESTS = \ tests/command/aggregate.sh \ + tests/command/attributes.sh \ tests/command/autorecod.sh \ tests/command/beg-data.sh \ tests/command/bignum.sh \ diff --git a/tests/command/attributes.sh b/tests/command/attributes.sh new file mode 100755 index 00000000..d4d36e60 --- /dev/null +++ b/tests/command/attributes.sh @@ -0,0 +1,148 @@ +#!/bin/sh + +# This program tests VARIABLE ATTRIBUTE and DATAFILE ATTRIBUTE +# commands, including the ability to write attributes to system files +# and read them back in again. + +TEMPDIR=/tmp/pspp-tst-$$ +TESTFILE=$TEMPDIR/`basename $0`.sps + +# ensure that top_builddir are absolute +if [ -z "$top_builddir" ] ; then top_builddir=. ; fi +if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi +top_builddir=`cd $top_builddir; pwd` +PSPP=$top_builddir/src/ui/terminal/pspp + +# ensure that top_srcdir is absolute +top_srcdir=`cd $top_srcdir; pwd` + +STAT_CONFIG_PATH=$top_srcdir/config +export STAT_CONFIG_PATH + + +cleanup() +{ + if [ x"$PSPP_TEST_NO_CLEANUP" != x ] ; then + echo "NOT cleaning $TEMPDIR" + return ; + fi + cd / + rm -rf $TEMPDIR +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + +activity="create program" +cat > $TESTFILE < 2") + ValidationRule[1]('a * b > 3') + /VARIABLES=c + ATTRIBUTE=QuestionWording('X or Y?'). +DISPLAY ATTRIBUTES. + +SAVE OUTFILE='attributes.sav'. +NEW FILE. +GET FILE='attributes.sav'. + +DATAFILE ATTRIBUTE + DELETE=Array[1] Array[2]. +VARIABLE ATTRIBUTE + VARIABLES=a + DELETE=ValidationRule + /VARIABLE=b + DELETE=validationrule[2]. + +DISPLAY ATTRIBUTES. + +EOF +if [ $? -ne 0 ] ; then no_result ; fi + + +activity="run program" +$SUPERVISOR $PSPP --testing-mode $TESTFILE +if [ $? -ne 0 ] ; then no_result ; fi + +activity="compare output" +perl -pi -e 's/^\s*$//g' $TEMPDIR/pspp.list +diff -b -w $TEMPDIR/pspp.list - << EOF +1.1 DISPLAY. ++--------+-----------------+-----------------------------------+ +|Variable|Description | | +#========#=================#===================================# +|a |Custom attributes| | +| |ValidationRule[1]|a * b > 3 | +| |ValidationRule[2]|a + b > 2 | ++--------+-----------------+-----------------------------------+ +|b |Custom attributes| | +| |ValidationRule[1]|a * b > 3 | +| |ValidationRule[2]|a + b > 2 | ++--------+-----------------+-----------------------------------+ +|c |Custom attributes| | +| |QuestionWording |X or Y? | ++--------+-----------------+-----------------------------------+ +1.2 DISPLAY. Custom data file attributes. ++---------+---------------+ +|Attribute|Value | +#=========#===============# +|array[1] |array element 1| +|array[2] |array element 2| +|key |value | ++---------+---------------+ +2.1 DISPLAY. ++--------+---------------+-----------------------------------+ +|Variable|Description | | +#========#===============#===================================# +|b |Custom attribut|s: | +| |ValidationRule |a * b > 3 | ++--------+---------------+-----------------------------------+ +|c |Custom attribut|s: | +| |QuestionWording|X or Y? | ++--------+---------------+-----------------------------------+ +2.2 DISPLAY. Custom data file attributes. ++---------+---------------+ +|Attribute|Value | +#=========#===============# +|array |array element 2| +|key |value | ++---------+---------------+ +EOF +if [ $? -ne 0 ] ; then fail ; fi + +pass; diff --git a/tests/dissect-sysfile.c b/tests/dissect-sysfile.c index 53268423..25d01158 100644 --- a/tests/dissect-sysfile.c +++ b/tests/dissect-sysfile.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007 Free Software Foundation, Inc. + Copyright (C) 2007, 2008 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -62,12 +62,18 @@ static void read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count); static void read_long_string_map (struct sfm_reader *r, size_t size, size_t count); +static void read_datafile_attributes (struct sfm_reader *r, + size_t size, size_t count); +static void read_variable_attributes (struct sfm_reader *r, + size_t size, size_t count); -static struct variable_to_value_map *open_variable_to_value_map ( +static struct text_record *open_text_record ( struct sfm_reader *, size_t size); -static void close_variable_to_value_map (struct variable_to_value_map *); -static bool read_variable_to_value_map (struct variable_to_value_map *, - char **key, char **value); +static void close_text_record (struct text_record *); +static bool read_variable_to_value_pair (struct text_record *, + char **key, char **value); +static char *text_tokenize (struct text_record *, int delimiter); +static bool text_match (struct text_record *text, int c); static void usage (int exit_code); static void sys_warn (struct sfm_reader *, const char *, ...) @@ -87,50 +93,61 @@ int main (int argc, char *argv[]) { struct sfm_reader r; - int rec_type; + int i; set_program_name (argv[0]); - if (argc != 2) + if (argc < 2) usage (EXIT_FAILURE); - r.file_name = argv[1]; - r.file = fopen (r.file_name, "rb"); - if (r.file == NULL) - error (EXIT_FAILURE, errno, "error opening \"%s\"", r.file_name); - r.n_variable_records = 0; - r.n_variables = 0; - - read_header (&r); - while ((rec_type = read_int (&r)) != 999) + for (i = 1; i < argc; i++) { - switch (rec_type) + int rec_type; + + r.file_name = argv[i]; + r.file = fopen (r.file_name, "rb"); + if (r.file == NULL) + error (EXIT_FAILURE, errno, "error opening \"%s\"", r.file_name); + r.n_variable_records = 0; + r.n_variables = 0; + + if (argc > 2) + printf ("Reading \"%s\":\n", r.file_name); + + read_header (&r); + while ((rec_type = read_int (&r)) != 999) { - case 2: - read_variable_record (&r); - break; + switch (rec_type) + { + case 2: + read_variable_record (&r); + break; - case 3: - read_value_label_record (&r); - break; + case 3: + read_value_label_record (&r); + break; - case 4: - sys_error (&r, _("Misplaced type 4 record.")); + case 4: + sys_error (&r, _("Misplaced type 4 record.")); - case 6: - read_document_record (&r); - break; + case 6: + read_document_record (&r); + break; - case 7: - read_extension_record (&r); - break; + case 7: + read_extension_record (&r); + break; - default: - sys_error (&r, _("Unrecognized record type %d."), rec_type); + default: + sys_error (&r, _("Unrecognized record type %d."), rec_type); + } } - } - printf ("%08lx: end-of-dictionary record (first byte of data at %08lx)\n", - ftell (r.file), ftell (r.file) + 4); + printf ("%08lx: end-of-dictionary record " + "(first byte of data at %08lx)\n", + ftell (r.file), ftell (r.file) + 4); + fclose (r.file); + } + return 0; } @@ -486,9 +503,12 @@ read_extension_record (struct sfm_reader *r) break; case 17: - /* Text field that defines variable attributes. New in - SPSS 14. */ - break; + read_datafile_attributes (r, size, count); + return; + + case 18: + read_variable_attributes (r, size, count); + return; default: sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype); @@ -613,15 +633,15 @@ read_display_parameters (struct sfm_reader *r, size_t size, size_t count) static void read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count) { - struct variable_to_value_map *map; + struct text_record *text; char *var; char *long_name; printf ("%08lx: long variable names (short => long)\n", ftell (r->file)); - map = open_variable_to_value_map (r, size * count); - while (read_variable_to_value_map (map, &var, &long_name)) + text = open_text_record (r, size * count); + while (read_variable_to_value_pair (text, &var, &long_name)) printf ("\t%s => %s\n", var, long_name); - close_variable_to_value_map (map); + close_text_record (text); } /* Reads record type 7, subtype 14, which gives the real length @@ -629,89 +649,170 @@ read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count) static void read_long_string_map (struct sfm_reader *r, size_t size, size_t count) { - struct variable_to_value_map *map; + struct text_record *text; char *var; char *length_s; printf ("%08lx: very long strings (variable => length)\n", ftell (r->file)); - map = open_variable_to_value_map (r, size * count); - while (read_variable_to_value_map (map, &var, &length_s)) + text = open_text_record (r, size * count); + while (read_variable_to_value_pair (text, &var, &length_s)) printf ("\t%s => %d\n", var, atoi (length_s)); - close_variable_to_value_map (map); + close_text_record (text); +} + +static bool +read_attributes (struct sfm_reader *r, struct text_record *text, + const char *variable) +{ + const char *key; + int index; + + for (;;) + { + key = text_tokenize (text, '('); + if (key == NULL) + return true; + + for (index = 1; ; index++) + { + /* Parse the value. */ + const char *value = text_tokenize (text, '\n'); + if (value == NULL) + { + sys_warn (r, _("%s: Error parsing attribute value %s[%d]"), + variable, key, index); + return false; + } + if (strlen (value) < 2 + || value[0] != '\'' || value[strlen (value) - 1] != '\'') + sys_warn (r, _("%s: Attribute value %s[%d] is not quoted: %s"), + variable, key, index, value); + else + printf ("\t%s: %s[%d] = \"%.*s\"\n", + variable, key, index, (int) strlen (value) - 2, value + 1); + + /* Was this the last value for this attribute? */ + if (text_match (text, ')')) + break; + } + + if (text_match (text, '/')) + return true; + } +} + +static void +read_datafile_attributes (struct sfm_reader *r, size_t size, size_t count) +{ + struct text_record *text; + + printf ("%08lx: datafile attributes\n", ftell (r->file)); + text = open_text_record (r, size * count); + read_attributes (r, text, "datafile"); + close_text_record (text); +} + +static void +read_variable_attributes (struct sfm_reader *r, size_t size, size_t count) +{ + struct text_record *text; + + printf ("%08lx: variable attributes\n", ftell (r->file)); + text = open_text_record (r, size * count); + for (;;) + { + const char *variable = text_tokenize (text, ':'); + if (variable == NULL || !read_attributes (r, text, variable)) + break; + } + close_text_record (text); } -/* Helpers for reading records that contain "variable=value" - pairs. */ +/* Helpers for reading records that consist of structured text + strings. */ /* State. */ -struct variable_to_value_map +struct text_record { char *buffer; /* Record contents. */ size_t size; /* Size of buffer. */ size_t pos; /* Current position in buffer. */ }; -/* Reads SIZE bytes into a "variable=value" map for R, - and returns the map. */ -static struct variable_to_value_map * -open_variable_to_value_map (struct sfm_reader *r, size_t size) +/* Reads SIZE bytes into a text record for R, + and returns the new text record. */ +static struct text_record * +open_text_record (struct sfm_reader *r, size_t size) { - struct variable_to_value_map *map = xmalloc (sizeof *map); + struct text_record *text = xmalloc (sizeof *text); char *buffer = xmalloc (size + 1); read_bytes (r, buffer, size); - map->buffer = buffer; - map->size = size; - map->pos = 0; - return map; + text->buffer = buffer; + text->size = size; + text->pos = 0; + return text; } -/* Closes MAP and frees its storage. - Not really needed, because the pool will free the map anyway, +/* Closes TEXT and frees its storage. + Not really needed, because the pool will free the text record anyway, but can be used to free it earlier. */ static void -close_variable_to_value_map (struct variable_to_value_map *map) +close_text_record (struct text_record *text) { - free (map); - free (map->buffer); + free (text->buffer); + free (text); } static char * -tokenize (struct variable_to_value_map *map, int delimiter) +text_tokenize (struct text_record *text, int delimiter) { - size_t start = map->pos; - while (map->pos < map->size - && map->buffer[map->pos] != delimiter - && map->buffer[map->pos] != '\0') - map->pos++; - if (map->pos == map->size) + size_t start = text->pos; + while (text->pos < text->size + && text->buffer[text->pos] != delimiter + && text->buffer[text->pos] != '\0') + text->pos++; + if (text->pos == text->size) return NULL; - map->buffer[map->pos++] = '\0'; - return &map->buffer[start]; + text->buffer[text->pos++] = '\0'; + return &text->buffer[start]; +} + +static bool +text_match (struct text_record *text, int c) +{ + if (text->pos < text->size && text->buffer[text->pos] == c) + { + text->pos++; + return true; + } + else + return false; } -/* Reads the next variable=value pair from MAP. +/* Reads a variable=value pair from TEXT. Looks up the variable in DICT and stores it into *VAR. Stores a null-terminated value into *VALUE. */ static bool -read_variable_to_value_map (struct variable_to_value_map *map, - char **key, char **value) +read_variable_to_value_pair (struct text_record *text, + char **key, char **value) { - *key = tokenize (map, '='); - *value = tokenize (map, '\t'); + *key = text_tokenize (text, '='); + *value = text_tokenize (text, '\t'); if (!*key || !*value) return false; - while (map->pos < map->size - && (map->buffer[map->pos] == '\t' - || map->buffer[map->pos] == '\0')) - map->pos++; + while (text->pos < text->size + && (text->buffer[text->pos] == '\t' + || text->buffer[text->pos] == '\0')) + text->pos++; return true; } static void usage (int exit_code) { - printf ("usage: %s SYSFILE, where SYSFILE is the name of a system file\n", + printf ("usage: %s SYSFILE...\n" + "where each SYSFILE is the name of a system file\n", program_name); exit (exit_code); } -- 2.30.2