From 2a64e944e842caf6df8476f3bc7c815a332dc975 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 25 Feb 2023 17:48:46 -0800 Subject: [PATCH] Add support for variable sets in the system file format. Variable sets are primarily a GUI feature, but this commit doesn't implement them in the GUI. --- doc/dev/system-file-format.texi | 53 ++++++- src/data/automake.mk | 2 + src/data/dictionary.c | 125 ++++++++++++++-- src/data/dictionary.h | 10 ++ src/data/sys-file-reader.c | 59 +++++++- src/data/sys-file-writer.c | 46 ++++++ src/data/varset.c | 54 +++++++ src/data/varset.h | 42 ++++++ src/language/command.def | 1 + src/language/commands/sys-file-info.c | 52 +++++++ tests/data/sys-file-reader.at | 200 ++++++++++++++++++++++++++ utilities/pspp-dump-sav.c | 37 +++++ 12 files changed, 666 insertions(+), 15 deletions(-) create mode 100644 src/data/varset.c create mode 100644 src/data/varset.h diff --git a/doc/dev/system-file-format.texi b/doc/dev/system-file-format.texi index c8b37a584e..ba390c1c02 100644 --- a/doc/dev/system-file-format.texi +++ b/doc/dev/system-file-format.texi @@ -92,6 +92,7 @@ possible to artificially synthesize files that use different encodings * Multiple Response Sets Records:: * Extra Product Info Record:: * Variable Display Parameter Record:: +* Variable Sets Record:: * Long Variable Names Record:: * Very Long String Record:: * Character Encoding Record:: @@ -1043,6 +1044,54 @@ Centre aligned @end table @end table +@node Variable Sets Record +@section Variable Sets Record + +The SPSS GUI offers users the ability to arrange variables in sets. +Users may enable and disable sets individually, and the data editor +and analysis dialog boxes only show enabled sets. Syntax does not use +variable sets. + +The variable sets record, if present, has the following format: + +@example +/* @r{Header.} */ +int32 rec_type; +int32 subtype; +int32 size; +int32 count; + +/* @r{Exactly @code{count} bytes of text.} */ +char text[]; +@end example + +@table @code +@item int32 rec_type; +Record type. Always set to 7. + +@item int32 subtype; +Record subtype. Always set to 5. + +@item int32 size; +Always set to 1. + +@item int32 count; +The total number of bytes in @code{text}. + +@item char text[]; +The variable sets, in a text-based format. + +Each variable set occupies one line of text, each of which ends with a +line feed (byte 0x0a), optionally preceded by a carriage return (byte +0x0d). + +Each line begins with the name of the variable set, followed by an +equals sign (@samp{=}) and a space (byte 0x20), followed by the long +variable names of the members of the set, separated by spaces. A +variable set may be empty, in which case the equals sign and the space +following it are still present. +@end table + @node Long Variable Names Record @section Long Variable Names Record @@ -1522,10 +1571,6 @@ The following extension record subtypes have also been observed, with the following believed meanings: @table @asis -@item 5 -A named variable set for use in the GUI (according to Aapi -H@"am@"al@"ainen). - @item 6 Date info, probably related to USE (according to Aapi H@"am@"al@"ainen). diff --git a/src/data/automake.mk b/src/data/automake.mk index dc7d696132..578ca724a3 100644 --- a/src/data/automake.mk +++ b/src/data/automake.mk @@ -133,6 +133,8 @@ src_data_libdata_la_SOURCES = \ src/data/vardict.h \ src/data/variable.h \ src/data/variable.c \ + src/data/varset.c \ + src/data/varset.h \ src/data/vector.c \ src/data/vector.h diff --git a/src/data/dictionary.c b/src/data/dictionary.c index 5563897014..d69f1c2901 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -32,6 +32,7 @@ #include "data/value-labels.h" #include "data/vardict.h" #include "data/variable.h" +#include "data/varset.h" #include "data/vector.h" #include "libpspp/array.h" #include "libpspp/assertion.h" @@ -78,6 +79,8 @@ struct dictionary struct attrset attributes; /* Custom attributes. */ struct mrset **mrsets; /* Multiple response sets. */ size_t n_mrsets; /* Number of multiple response sets. */ + struct varset **varsets; /* Variable sets. */ + size_t n_varsets; /* Number of variable sets. */ /* Whether variable names must be valid identifiers. Normally, this is true, but sometimes a dictionary is prepared for external use @@ -96,6 +99,7 @@ struct dictionary static void dict_unset_split_var (struct dictionary *, struct variable *, bool); static void dict_unset_mrset_var (struct dictionary *, struct variable *); +static void dict_unset_varset_var (struct dictionary *, struct variable *); /* Compares two double pointers to variables, which should point to elements of a struct dictionary's `var' member array. */ @@ -299,13 +303,10 @@ dict_create (const char *encoding) struct dictionary * dict_clone (const struct dictionary *s) { - struct dictionary *d; - size_t i; - - d = dict_create (s->encoding); + struct dictionary *d = dict_create (s->encoding); dict_set_names_must_be_ids (d, dict_get_names_must_be_ids (s)); - for (i = 0; i < s->n_vars; i++) + for (size_t i = 0; i < s->n_vars; i++) { struct variable *sv = s->vars[i].var; struct variable *dv = dict_clone_var_assert (d, sv); @@ -322,7 +323,7 @@ dict_clone (const struct dictionary *s) if (d->n_splits > 0) { d->split = xnmalloc (d->n_splits, sizeof *d->split); - for (i = 0; i < d->n_splits; i++) + for (size_t i = 0; i < d->n_splits; i++) d->split[i] = dict_lookup_var_assert (d, var_get_name (s->split[i])); } d->split_type = s->split_type; @@ -339,12 +340,12 @@ dict_clone (const struct dictionary *s) d->n_vectors = s->n_vectors; d->vector = xnmalloc (d->n_vectors, sizeof *d->vector); - for (i = 0; i < s->n_vectors; i++) + for (size_t i = 0; i < s->n_vectors; i++) d->vector[i] = vector_clone (s->vector[i], s, d); dict_set_attributes (d, dict_get_attributes (s)); - for (i = 0; i < s->n_mrsets; i++) + for (size_t i = 0; i < s->n_mrsets; i++) { const struct mrset *old = s->mrsets[i]; struct mrset *new; @@ -358,10 +359,20 @@ dict_clone (const struct dictionary *s) dict_add_mrset (d, new); } - return d; -} + for (size_t i = 0; i < s->n_varsets; i++) + { + const struct varset *old = s->varsets[i]; + + /* Clone old varset, then replace vars from D by vars from S. */ + struct varset *new = varset_clone (old); + for (size_t j = 0; j < new->n_vars; j++) + new->vars[j] = dict_lookup_var_assert (d, var_get_name (new->vars[j])); + dict_add_varset (d, new); + } + return d; +} /* Returns the SPLIT FILE vars (see cmd_split_file()). Call dict_get_n_splits() to determine how many SPLIT FILE vars @@ -482,6 +493,7 @@ dict_delete_var__ (struct dictionary *d, struct variable *v, bool skip_callbacks dict_unset_split_var (d, v, skip_callbacks); dict_unset_mrset_var (d, v); + dict_unset_varset_var (d, v); if (d->weight == v) dict_set_weight (d, NULL); @@ -578,6 +590,7 @@ dict_delete_consecutive_vars (struct dictionary *d, size_t idx, size_t count) dict_unset_split_var (d, v, false); dict_unset_mrset_var (d, v); + dict_unset_varset_var (d, v); if (d->weight == v) dict_set_weight (d, NULL); @@ -694,6 +707,7 @@ _dict_destroy (struct dictionary *d) hmap_destroy (&d->name_map); attrset_destroy (&d->attributes); dict_clear_mrsets (d); + dict_clear_varsets (d); free (d->encoding); free (d); } @@ -1804,6 +1818,97 @@ dict_unset_mrset_var (struct dictionary *dict, struct variable *var) } } + +/* Returns the variable set in DICT with index IDX, which must be between 0 and + the count returned by dict_get_n_varsets(), exclusive. */ +const struct varset * +dict_get_varset (const struct dictionary *dict, size_t idx) +{ + assert (idx < dict->n_varsets); + return dict->varsets[idx]; +} + +/* Returns the number of variable sets in DICT. */ +size_t +dict_get_n_varsets (const struct dictionary *dict) +{ + return dict->n_varsets; +} + +/* Looks for a variable set named NAME in DICT. If it finds one, returns its + index; otherwise, returns SIZE_MAX. */ +static size_t +dict_lookup_varset_idx (const struct dictionary *dict, const char *name) +{ + for (size_t i = 0; i < dict->n_varsets; i++) + if (!utf8_strcasecmp (name, dict->varsets[i]->name)) + return i; + + return SIZE_MAX; +} + +/* Looks for a multiple response set named NAME in DICT. If it finds one, + returns it; otherwise, returns NULL. */ +const struct varset * +dict_lookup_varset (const struct dictionary *dict, const char *name) +{ + size_t idx = dict_lookup_varset_idx (dict, name); + return idx != SIZE_MAX ? dict->varsets[idx] : NULL; +} + +/* Adds VARSET to DICT, replacing any existing set with the same name. Returns + true if a set was replaced, false if none existed with the specified name. + + Ownership of VARSET is transferred to DICT. */ +bool +dict_add_varset (struct dictionary *dict, struct varset *varset) +{ + size_t idx = dict_lookup_varset_idx (dict, varset->name); + if (idx == SIZE_MAX) + { + dict->varsets = xrealloc (dict->varsets, + (dict->n_varsets + 1) * sizeof *dict->varsets); + dict->varsets[dict->n_varsets++] = varset; + return true; + } + else + { + varset_destroy (dict->varsets[idx]); + dict->varsets[idx] = varset; + return false; + } +} + +/* Deletes all variable sets from DICT. */ +void +dict_clear_varsets (struct dictionary *dict) +{ + for (size_t i = 0; i < dict->n_varsets; i++) + varset_destroy (dict->varsets[i]); + free (dict->varsets); + dict->varsets = NULL; + dict->n_varsets = 0; +} + +/* Removes VAR, which must be in DICT, from DICT's multiple response sets. */ +static void +dict_unset_varset_var (struct dictionary *dict, struct variable *var) +{ + assert (dict_contains_var (dict, var)); + + for (size_t i = 0; i < dict->n_varsets; i++) + { + struct varset *varset = dict->varsets[i]; + + for (size_t j = 0; j < varset->n_vars;) + if (varset->vars[j] == var) + remove_element (varset->vars, varset->n_vars--, + sizeof *varset->vars, j); + else + j++; + } +} + /* Returns D's attribute set. The caller may examine or modify the attribute set, but must not destroy it. Destroying D or calling dict_set_attributes for D will also destroy D's diff --git a/src/data/dictionary.h b/src/data/dictionary.h index e1d67ee704..9a4e8bf1c9 100644 --- a/src/data/dictionary.h +++ b/src/data/dictionary.h @@ -181,6 +181,16 @@ bool dict_add_mrset (struct dictionary *, struct mrset *); bool dict_delete_mrset (struct dictionary *, const char *name); void dict_clear_mrsets (struct dictionary *); +/* Variable sets. */ +const struct varset *dict_get_varset (const struct dictionary *, size_t idx); +size_t dict_get_n_varsets (const struct dictionary *); +const struct varset *dict_lookup_varset (const struct dictionary *, + const char *name); + +bool dict_add_varset (struct dictionary *, struct varset *); +bool dict_delete_varset (struct dictionary *, const char *name); +void dict_clear_varsets (struct dictionary *); + /* Attributes. */ struct attrset *dict_get_attributes (const struct dictionary *); void dict_set_attributes (struct dictionary *, const struct attrset *); diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 66552dc4b4..880b62e384 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -41,6 +41,7 @@ #include "data/value-labels.h" #include "data/value.h" #include "data/variable.h" +#include "data/varset.h" #include "libpspp/array.h" #include "libpspp/assertion.h" #include "libpspp/compiler.h" @@ -371,6 +372,9 @@ static void parse_long_string_value_labels (struct sfm_reader *, static void parse_long_string_missing_values ( struct sfm_reader *, const struct sfm_extension_record *, struct dictionary *); +static void parse_var_sets (struct sfm_reader *, + const struct sfm_extension_record *, + struct dictionary *); /* Frees the strings inside INFO. */ void @@ -840,6 +844,8 @@ sfm_decode (struct any_reader *r_, const char *encoding, if (r->extensions[EXT_LONG_MISSING] != NULL) parse_long_string_missing_values (r, r->extensions[EXT_LONG_MISSING], dict); + if (r->extensions[EXT_VAR_SETS]) + parse_var_sets (r, r->extensions[EXT_VAR_SETS], dict); /* Warn if the actual amount of data per case differs from the amount that the header claims. SPSS version 13 gets this @@ -1284,6 +1290,7 @@ read_extension_record (struct sfm_reader *r, int subtype, /* Implemented record types. */ { EXT_INTEGER, 4, 8 }, { EXT_FLOAT, 8, 3 }, + { EXT_VAR_SETS, 1, 0 }, { EXT_MRSETS, 1, 0 }, { EXT_PRODUCT_INFO, 1, 0 }, { EXT_DISPLAY, 4, 0 }, @@ -1298,7 +1305,6 @@ read_extension_record (struct sfm_reader *r, int subtype, { EXT_LONG_MISSING, 1, 0 }, /* Ignored record types. */ - { EXT_VAR_SETS, 0, 0 }, { EXT_DATE, 0, 0 }, { EXT_DATA_ENTRY, 0, 0 }, { EXT_DATAVIEW, 0, 0 }, @@ -2719,6 +2725,57 @@ parse_long_string_missing_values (struct sfm_reader *r, var_set_missing_values (var, &mv); } } + +static void +parse_var_sets (struct sfm_reader *r, + const struct sfm_extension_record *record, + struct dictionary *dict) +{ + struct text_record *text = open_text_record (r, record, true); + for (;;) + { + char *varset_name = text_get_token (text, ss_cstr ("="), NULL); + if (!varset_name) + break; + + struct varset *varset = xmalloc (sizeof *varset); + *varset = (struct varset) { + .name = xstrdup (varset_name), + }; + + text_match (text, ' '); + + size_t allocated_vars = 0; + char delimiter; + do + { + char *var_name = text_get_token (text, ss_cstr (" \n"), &delimiter); + if (!var_name) + break; + + size_t len = strlen (var_name); + if (len > 0 && var_name[len - 1] == '\r') + var_name[len - 1] = '\0'; + + struct variable *var = dict_lookup_var (dict, var_name); + if (var) + { + if (varset->n_vars >= allocated_vars) + varset->vars = x2nrealloc (varset->vars, &allocated_vars, + sizeof *varset->vars); + varset->vars[varset->n_vars++] = var; + } + else + sys_warn (r, record->pos, + _("Variable set %s contains unknown variable %s."), + varset_name, var_name); + } + while (delimiter == ' '); + + dict_add_varset (dict, varset); + } + close_text_record (r, text); +} /* Case reader. */ diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index 33215241c2..34f420e4a4 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -41,6 +41,7 @@ #include "data/short-names.h" #include "data/value-labels.h" #include "data/variable.h" +#include "data/varset.h" #include "libpspp/float-format.h" #include "libpspp/i18n.h" #include "libpspp/integer-format.h" @@ -132,6 +133,7 @@ static void write_long_string_value_labels (struct sfm_writer *, static void write_long_string_missing_values (struct sfm_writer *, const struct dictionary *); +static void write_varsets (struct sfm_writer *, const struct dictionary *); static void write_mrsets (struct sfm_writer *, const struct dictionary *, bool pre_v14); @@ -275,6 +277,7 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, write_integer_info_record (w, d); write_float_info_record (w); + write_varsets (w, d); write_mrsets (w, d, true); write_variable_display_parameters (w, d); @@ -804,6 +807,49 @@ write_variable_attributes (struct sfm_writer *w, const struct dictionary *d) ds_destroy (&s); } +/* Write variable sets. */ +static void +write_varsets (struct sfm_writer *w, const struct dictionary *dict) +{ + const char *encoding = dict_get_encoding (dict); + + if (is_encoding_ebcdic_compatible (encoding)) + { + /* FIXME. */ + return; + } + + size_t n_varsets = dict_get_n_varsets (dict); + if (n_varsets == 0) + return; + + struct string s = DS_EMPTY_INITIALIZER; + for (size_t i = 0; i < n_varsets; i++) + { + const struct varset *varset = dict_get_varset (dict, i); + + char *name = recode_string (encoding, "UTF-8", varset->name, -1); + ds_put_format (&s, "%s= ", name); + free (name); + + for (size_t j = 0; j < varset->n_vars; j++) + { + if (j) + ds_put_byte (&s, ' '); + + const char *name_utf8 = var_get_name (varset->vars[j]); + char *name = recode_string (encoding, "UTF-8", name_utf8, -1); + ds_put_cstr (&s, name); + free (name); + } + ds_put_byte (&s, '\n'); + } + + if (!ds_is_empty (&s)) + write_string_record (w, ds_ss (&s), 5); + ds_destroy (&s); +} + /* Write multiple response sets. If PRE_V14 is true, writes sets supported by SPSS before release 14, otherwise writes sets supported only by later versions. */ diff --git a/src/data/varset.c b/src/data/varset.c new file mode 100644 index 0000000000..1a2e04c0f2 --- /dev/null +++ b/src/data/varset.c @@ -0,0 +1,54 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "data/varset.h" + +#include + +#include "data/dictionary.h" + +#include "gl/xalloc.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +/* Creates and returns a clone of OLD. The caller is responsible for freeing + the new variable set (using varset_destroy()). */ +struct varset * +varset_clone (const struct varset *old) +{ + struct varset *new = xmalloc (sizeof *new); + *new = (struct varset) { + .name = xstrdup (old->name), + .vars = xmemdup (old->vars, old->n_vars * sizeof *old->vars), + .n_vars = old->n_vars, + }; + return new; +} + +/* Frees VARSET and the data that it contains. */ +void +varset_destroy (struct varset *varset) +{ + if (varset) + { + free (varset->name); + free (varset->vars); + free (varset); + } +} diff --git a/src/data/varset.h b/src/data/varset.h new file mode 100644 index 0000000000..67de8a5291 --- /dev/null +++ b/src/data/varset.h @@ -0,0 +1,42 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2023 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef DATA_VARSET_H +#define DATA_VARSET_H 1 + +/* Variable set. + + A variable set is a named set of variables. The SPSS GUI allows users to + pick which variable sets are displayed in the data editor and dialog boxes. + */ + +#include +#include + +struct dictionary; + +/* A variable set. */ +struct varset + { + char *name; /* UTF-8 encoded name. */ + struct variable **vars; /* Constituent variables. */ + size_t n_vars; /* Number of constituent variables. */ + }; + +struct varset *varset_clone (const struct varset *); +void varset_destroy (struct varset *); + +#endif /* data/varset.h */ diff --git a/src/language/command.def b/src/language/command.def index 0131b5dc7d..24ce236f56 100644 --- a/src/language/command.def +++ b/src/language/command.def @@ -71,6 +71,7 @@ DEF_CMD (S_DATA | S_INPUT_PROGRAM | S_NESTED_ANY, 0, "ADD DOCUMENT", cmd_add_doc DEF_CMD (S_DATA | S_INPUT_PROGRAM | S_NESTED_ANY, 0, "APPLY DICTIONARY", cmd_apply_dictionary) DEF_CMD (S_DATA | S_INPUT_PROGRAM | S_NESTED_ANY, 0, "DATAFILE ATTRIBUTE", cmd_datafile_attribute) DEF_CMD (S_DATA | S_INPUT_PROGRAM | S_NESTED_ANY, 0, "DISPLAY", cmd_display) +DEF_CMD (S_DATA | S_INPUT_PROGRAM | S_NESTED_ANY, F_TESTING, "DISPLAY VARIABLE SETS", cmd_display_variable_sets) DEF_CMD (S_DATA | S_INPUT_PROGRAM | S_NESTED_ANY, 0, "DOCUMENT", cmd_document) DEF_CMD (S_DATA | S_INPUT_PROGRAM | S_NESTED_ANY, 0, "DROP DOCUMENTS", cmd_drop_documents) DEF_CMD (S_DATA | S_INPUT_PROGRAM | S_NESTED_ANY, 0, "FORMATS", cmd_formats) diff --git a/src/language/commands/sys-file-info.c b/src/language/commands/sys-file-info.c index 392e89b545..6611dcd957 100644 --- a/src/language/commands/sys-file-info.c +++ b/src/language/commands/sys-file-info.c @@ -30,6 +30,7 @@ #include "data/missing-values.h" #include "data/value-labels.h" #include "data/variable.h" +#include "data/varset.h" #include "data/vector.h" #include "language/command.h" #include "language/commands/file-handle.h" @@ -455,6 +456,57 @@ cmd_display_macros (struct lexer *lexer, struct dataset *ds UNUSED) return CMD_SUCCESS; } +int +cmd_display_variable_sets (struct lexer *lexer UNUSED, struct dataset *ds) +{ + const struct dictionary *dict = dataset_dict (ds); + size_t n_varsets = dict_get_n_varsets (dict); + if (n_varsets == 0) + { + msg (SN, _("No variable sets defined.")); + return CMD_SUCCESS; + } + + struct pivot_table *table = pivot_table_create (N_("Variable Sets")); + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Attributes"), + N_("Variable")); + struct pivot_dimension *varset_dim = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Variable Set and Position")); + varset_dim->root->show_label = true; + + for (size_t i = 0; i < n_varsets; i++) + { + const struct varset *vs = dict_get_varset (dict, i); + + struct pivot_category *group = pivot_category_create_group__ ( + varset_dim->root, pivot_value_new_user_text ( + vs->name, -1)); + + for (size_t j = 0; j < vs->n_vars; j++) + { + struct variable *var = vs->vars[j]; + + int row = pivot_category_create_leaf ( + group, pivot_value_new_integer (j + 1)); + + pivot_table_put2 (table, 0, row, pivot_value_new_variable (var)); + } + + if (!vs->n_vars) + { + int row = pivot_category_create_leaf ( + group, pivot_value_new_user_text ("n/a", -1)); + + pivot_table_put2 (table, 0, row, + pivot_value_new_text (N_("(empty)"))); + } + } + + pivot_table_submit (table); + + return CMD_SUCCESS; +} + static char * get_documents_as_string (const struct dictionary *dict) { diff --git a/tests/data/sys-file-reader.at b/tests/data/sys-file-reader.at index 542f6978bf..4057109e1b 100644 --- a/tests/data/sys-file-reader.at +++ b/tests/data/sys-file-reader.at @@ -577,6 +577,115 @@ num1 done AT_CLEANUP +AT_SETUP([variable sets]) +AT_KEYWORDS([sack synthetic system file positive set]) +AT_DATA([sys-file.sack], [dnl +dnl File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; dnl Layout code +10; dnl Nominal case size +0; dnl Not compressed +0; dnl Not weighted +0; dnl No cases. +100.0; dnl Bias. +"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file"; +i8 0 *3; + +dnl Variable Set 1 +2; 0; 0; 0; 0x050800 *2; i8 0x82; i8 0xa0; s6 ""; +2; 0; 0; 0; 0x050800 *2; s8 "B"; +2; 0; 0; 0; 0x050800 *2; s8 "C"; + +dnl vs2 +2; 0; 0; 0; 0x050800 *2; s8 "D"; +2; 0; 0; 0; 0x050800 *2; s8 "E"; +2; 0; 0; 0; 0x050800 *2; s8 "F"; +2; 0; 0; 0; 0x050800 *2; s8 "G"; + +dnl c +2; 4; 0; 0; 0x010400 *2; s8 "H"; +2; 4; 0; 0; 0x010400 *2; s8 "I"; +2; 4; 0; 0; 0x010400 *2; s8 "J"; + +dnl Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 932; + +7; 5; 1; +COUNT( + "Variable Set 1= "; i8 0x82; i8 0xa0; " b c"; i8 10; + "vs2=d e f g"; i8 10; + "c=h i j"; i8 13; i8 10; + "d= e g i b f"; i8 10; + "Empty Variable Set= "; i8 10); + +dnl Character encoding record. +7; 20; 1; 9; "shift_jis"; + +dnl Dictionary termination record. +999; 0; +]) +AT_DATA([expout], [dnl + Variables +╭────┬────────┬────────────┬────────────╮ +│Name│Position│Print Format│Write Format│ +├────┼────────┼────────────┼────────────┤ +│あ │ 1│F8.0 │F8.0 │ +│b │ 2│F8.0 │F8.0 │ +│c │ 3│F8.0 │F8.0 │ +│d │ 4│F8.0 │F8.0 │ +│e │ 5│F8.0 │F8.0 │ +│f │ 6│F8.0 │F8.0 │ +│g │ 7│F8.0 │F8.0 │ +│h │ 8│A4 │A4 │ +│i │ 9│A4 │A4 │ +│j │ 10│A4 │A4 │ +╰────┴────────┴────────────┴────────────╯ + + Variable Sets +╭──────────────────────────┬────────╮ +│Variable Set and Position │Variable│ +├──────────────────────────┼────────┤ +│Variable Set 1 1 │あ │ +│ 2 │b │ +│ 3 │c │ +├──────────────────────────┼────────┤ +│vs2 1 │d │ +│ 2 │e │ +│ 3 │f │ +│ 4 │g │ +├──────────────────────────┼────────┤ +│c 1 │h │ +│ 2 │i │ +│ 3 │j │ +├──────────────────────────┼────────┤ +│d 1 │e │ +│ 2 │g │ +│ 3 │i │ +│ 4 │b │ +│ 5 │f │ +├──────────────────────────┼────────┤ +│Empty Variable Set n/a │(empty) │ +╰──────────────────────────┴────────╯ +]) +AT_DATA([sys-file-1.sps], [dnl +GET FILE='sys-file.sav'. +DISPLAY VARIABLES. +DISPLAY VARIABLE SETS. +SAVE OUTFILE='sys-file-2.sav'. +]) +AT_DATA([sys-file-2.sps], [dnl +GET FILE='sys-file-2.sav'. +DISPLAY VARIABLES. +DISPLAY VARIABLE SETS. +]) +for variant in be le; do + AT_CHECK([sack --$variant sys-file.sack > sys-file.sav]) + AT_CHECK([rm -f sys-file-2.sav]) + AT_CHECK([pspp --testing-mode -O box=unicode sys-file-1.sps], [0], [expout]) + AT_CHECK([pspp --testing-mode -O box=unicode sys-file-2.sps], [0], [expout]) +done +AT_CLEANUP + AT_SETUP([multiple response sets]) AT_KEYWORDS([sack synthetic system file positive]) AT_DATA([sys-file.sack], [dnl @@ -2323,6 +2432,97 @@ for variant in be le; do done AT_CLEANUP +AT_SETUP([unknown variables in variable sets]) +AT_KEYWORDS([sack synthetic system file negative set]) +AT_DATA([sys-file.sack], [dnl +dnl File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; dnl Layout code +10; dnl Nominal case size +0; dnl Not compressed +0; dnl Not weighted +0; dnl No cases. +100.0; dnl Bias. +"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file"; +i8 0 *3; + +dnl Variable Set 1 +2; 0; 0; 0; 0x050800 *2; i8 0x82; i8 0xa0; s6 ""; +2; 0; 0; 0; 0x050800 *2; s8 "B"; +2; 0; 0; 0; 0x050800 *2; s8 "C"; + +dnl vs2 +2; 0; 0; 0; 0x050800 *2; s8 "D"; +2; 0; 0; 0; 0x050800 *2; s8 "E"; +2; 0; 0; 0; 0x050800 *2; s8 "F"; +2; 0; 0; 0; 0x050800 *2; s8 "G"; + +dnl c +2; 4; 0; 0; 0x010400 *2; s8 "H"; +2; 4; 0; 0; 0x010400 *2; s8 "I"; +2; 4; 0; 0; 0x010400 *2; s8 "J"; + +dnl Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 932; + +7; 5; 1; +COUNT( + "Variable Set 1= "; i8 0x82; i8 0xa0; " "; >>"xyzzy"<<; " b c"; i8 10; + "vs2=d "; >>"foo"<<; " e f g"; i8 10;); + +dnl Character encoding record. +7; 20; 1; 9; "shift_jis"; + +dnl Dictionary termination record. +999; 0; +]) +for variant in be le; do + AT_CHECK([sack --$variant sys-file.sack > sys-file.sav]) + AT_DATA([sys-file.sps], [dnl +GET FILE='sys-file.sav'. +DISPLAY VARIABLES. +DISPLAY VARIABLE SETS. +]) + AT_CHECK([pspp --testing-mode -O box=unicode sys-file.sps], [0], [dnl +warning: `sys-file.sav' near offset 0x228: Variable set Variable Set 1 contains +unknown variable xyzzy. + +warning: `sys-file.sav' near offset 0x228: Variable set vs2 contains unknown +variable foo. + + Variables +╭────┬────────┬────────────┬────────────╮ +│Name│Position│Print Format│Write Format│ +├────┼────────┼────────────┼────────────┤ +│あ │ 1│F8.0 │F8.0 │ +│b │ 2│F8.0 │F8.0 │ +│c │ 3│F8.0 │F8.0 │ +│d │ 4│F8.0 │F8.0 │ +│e │ 5│F8.0 │F8.0 │ +│f │ 6│F8.0 │F8.0 │ +│g │ 7│F8.0 │F8.0 │ +│h │ 8│A4 │A4 │ +│i │ 9│A4 │A4 │ +│j │ 10│A4 │A4 │ +╰────┴────────┴────────────┴────────────╯ + + Variable Sets +╭──────────────────────────┬────────╮ +│Variable Set and Position │Variable│ +├──────────────────────────┼────────┤ +│Variable Set 1 1 │あ │ +│ 2 │b │ +│ 3 │c │ +├──────────────────────────┼────────┤ +│vs2 1 │d │ +│ 2 │e │ +│ 3 │f │ +│ 4 │g │ +╰──────────────────────────┴────────╯ +]) +done +AT_CLEANUP + AT_SETUP([bad mrsets name]) AT_KEYWORDS([sack synthetic system file negative multiple response]) AT_DATA([sys-file.sack], [dnl diff --git a/utilities/pspp-dump-sav.c b/utilities/pspp-dump-sav.c index 5f693c515b..7c261a93ba 100644 --- a/utilities/pspp-dump-sav.c +++ b/utilities/pspp-dump-sav.c @@ -76,6 +76,7 @@ static void read_machine_float_info (struct sfm_reader *, size_t size, size_t count); static void read_extra_product_info (struct sfm_reader *, size_t size, size_t count); +static void read_variable_sets (struct sfm_reader *, size_t size, size_t count); static void read_mrsets (struct sfm_reader *, size_t size, size_t count); static void read_display_parameters (struct sfm_reader *, size_t size, size_t count); @@ -607,6 +608,10 @@ read_extension_record (struct sfm_reader *r) read_machine_float_info (r, size, count); return; + case 5: + read_variable_sets (r, size, count); + return; + case 6: /* DATE variable information. We don't use it yet, but we should. */ @@ -730,6 +735,38 @@ read_machine_float_info (struct sfm_reader *r, size_t size, size_t count) DBL_DIG + 1, lowest, lowest, "LOWEST"); } +/* Read record type 7, subtype 5. */ +static void +read_variable_sets (struct sfm_reader *r, size_t size, size_t count) +{ + printf ("%08llx: variable sets\n", (long long int) ftello (r->file)); + struct text_record *text = open_text_record (r, size, count); + for (;;) + { + while (text_match (text, '\n')) + continue; + + const char *set = text_tokenize (text, '='); + if (!set) + break; + + /* Always present even for an empty set. */ + text_match (text, ' '); + + char *variables = text_tokenize (text, '\n'); + if (!variables) + printf ("\tset \"%s\" is empty\n", set); + else + { + size_t length = strlen (variables); + if (variables[length - 1] == '\r') + variables[length - 1] = '\0'; + printf ("\tset \"%s\" contains \"%s\"\n", set, variables); + } + } + close_text_record (text); +} + static void read_extra_product_info (struct sfm_reader *r, size_t size, size_t count) -- 2.30.2