From 6999c6d125665923b52ae15cbad83d06c92a8875 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 23 Jul 2007 05:05:45 +0000 Subject: [PATCH] Fix problems with uniqueness of short names in system files with very long string variables. Now a variable may have multiple short names. * automake.mk (src_data_libdata_a_SOURCES): Add new files short-names.c, short-names.h. * dictionary.c (dict_clone): Clone all the short names. (compare_strings): Move into short-names.c. (hash_strings): Ditto. (set_var_short_name_suffix): Ditto. (dict_assign_short_names): Ditto, rename short_names_assign, change to assign all short names. * por-file-writer.c (write_variables): Use short_names_assign instead of dict_assign_short_names. * short-names.c: New file. * short-names.h: New file. * sys-file-private.c (sfm_width_to_segments): New function. * sys-file-reader.c (read_long_var_name_map): Save and restore all the short names, not just the first one. * sys-file-writer.c (cont_var_name): Removed. (sfm_open_writer): Use short_names_assign instead of dict_assign_short_names. Use unique short names assigned by short_names_assign instead of those generated by cont_var_name. * variable.c (struct variable): Remove `short_name' member, replace by `short_names' and `short_name_cnt'. (var_create) Initialize new members. (var_get_short_name_cnt): New function. (var_get_short_name): Now takes an index argument. Changed most callers to pass 0. (var_set_short_name): Ditto. (var_clear_short_name): Renamed var_clear_short_names, changed to clear all short names. --- src/data/ChangeLog | 43 ++++++++ src/data/automake.mk | 2 + src/data/dictionary.c | 136 ++---------------------- src/data/dictionary.h | 2 - src/data/por-file-writer.c | 7 +- src/data/short-names.c | 204 ++++++++++++++++++++++++++++++++++++ src/data/short-names.h | 38 +++++++ src/data/sys-file-private.c | 12 +++ src/data/sys-file-private.h | 1 + src/data/sys-file-reader.c | 40 ++++--- src/data/sys-file-writer.c | 38 ++----- src/data/variable.c | 94 +++++++++++------ src/data/variable.h | 7 +- 13 files changed, 413 insertions(+), 211 deletions(-) create mode 100644 src/data/short-names.c create mode 100644 src/data/short-names.h diff --git a/src/data/ChangeLog b/src/data/ChangeLog index 9225fa00..516447db 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,46 @@ +2007-07-22 Ben Pfaff + + Fix problems with uniqueness of short names in system files with + very long string variables. Now a variable may have multiple + short names. + + * automake.mk (src_data_libdata_a_SOURCES): Add new files + short-names.c, short-names.h. + + * dictionary.c (dict_clone): Clone all the short names. + (compare_strings): Move into short-names.c. + (hash_strings): Ditto. + (set_var_short_name_suffix): Ditto. + (dict_assign_short_names): Ditto, rename short_names_assign, + change to assign all short names. + + * por-file-writer.c (write_variables): Use short_names_assign + instead of dict_assign_short_names. + + * short-names.c: New file. + + * short-names.h: New file. + + * sys-file-private.c (sfm_width_to_segments): New function. + + * sys-file-reader.c (read_long_var_name_map): Save and restore all + the short names, not just the first one. + + * sys-file-writer.c (cont_var_name): Removed. + (sfm_open_writer): Use short_names_assign instead of + dict_assign_short_names. Use unique short names assigned by + short_names_assign instead of those generated by cont_var_name. + + * variable.c (struct variable): Remove `short_name' member, + replace by `short_names' and `short_name_cnt'. + (var_create) Initialize new members. + (var_get_short_name_cnt): New function. + (var_get_short_name): Now takes an index argument. Changed most + callers to pass 0. + (var_set_short_name): Ditto. + (var_clear_short_name): Renamed var_clear_short_names, changed to + clear all short names. + 2007-07-22 Ben Pfaff * variable.c (var_set_width): Use new var_set_width function. diff --git a/src/data/automake.mk b/src/data/automake.mk index bc056531..70eb15b1 100644 --- a/src/data/automake.mk +++ b/src/data/automake.mk @@ -66,6 +66,8 @@ src_data_libdata_a_SOURCES = \ src/data/scratch-writer.h \ src/data/settings.c \ src/data/settings.h \ + src/data/short-names.c \ + src/data/short-names.h \ src/data/sparse-cases.c \ src/data/sparse-cases.h \ src/data/sys-file-private.c \ diff --git a/src/data/dictionary.c b/src/data/dictionary.c index 44504ed9..cee678e4 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -114,7 +114,10 @@ dict_clone (const struct dictionary *s) { struct variable *sv = s->var[i]; struct variable *dv = dict_clone_var_assert (d, sv, var_get_name (sv)); - var_set_short_name (dv, var_get_short_name (sv)); + size_t i; + + for (i = 0; i < var_get_short_name_cnt (sv); i++) + var_set_short_name (dv, i, var_get_short_name (sv, i)); } d->next_value_idx = s->next_value_idx; @@ -624,7 +627,7 @@ dict_rename_var (struct dictionary *d, struct variable *v, hsh_force_insert (d->name_tab, v); if (get_algorithm () == ENHANCED) - var_clear_short_name (v); + var_clear_short_names (v); if ( d->callbacks && d->callbacks->var_changed ) d->callbacks->var_changed (d, var_get_dict_index (v), d->cb_data); @@ -690,7 +693,7 @@ dict_rename_vars (struct dictionary *d, /* Clear short names. */ if (get_algorithm () == ENHANCED) for (i = 0; i < count; i++) - var_clear_short_name (vars[i]); + var_clear_short_names (vars[i]); pool_destroy (pool); return true; @@ -1282,133 +1285,6 @@ dict_clear_vectors (struct dictionary *d) d->vector_cnt = 0; } -/* Compares two strings. */ -static int -compare_strings (const void *a, const void *b, const void *aux UNUSED) -{ - return strcmp (a, b); -} - -/* Hashes a string. */ -static unsigned -hash_string (const void *s, const void *aux UNUSED) -{ - return hsh_hash_string (s); -} - - -/* Sets V's short name to BASE, followed by a suffix of the form - _A, _B, _C, ..., _AA, _AB, etc. according to the value of - SUFFIX_NUMBER. Truncates BASE as necessary to fit. */ -static void -set_var_short_name_suffix (struct variable *v, const char *base, - int suffix_number) -{ - char suffix[SHORT_NAME_LEN + 1]; - char short_name[SHORT_NAME_LEN + 1]; - char *start, *end; - int len, ofs; - - assert (v != NULL); - assert (suffix_number >= 0); - - /* Set base name. */ - var_set_short_name (v, base); - - /* Compose suffix. */ - start = end = suffix + sizeof suffix - 1; - *end = '\0'; - do - { - *--start = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[suffix_number % 26]; - if (start <= suffix + 1) - msg (SE, _("Variable suffix too large.")); - suffix_number /= 26; - } - while (suffix_number > 0); - *--start = '_'; - - /* Append suffix to V's short name. */ - str_copy_trunc (short_name, sizeof short_name, base); - len = end - start; - if (len + strlen (short_name) > SHORT_NAME_LEN) - ofs = SHORT_NAME_LEN - len; - else - ofs = strlen (short_name); - strcpy (short_name + ofs, start); - - /* Set name. */ - var_set_short_name (v, short_name); -} - -/* Assigns a valid, unique short_name[] to each variable in D. - Each variable whose actual name is short has highest priority - for that short name. Otherwise, variables with an existing - short_name[] have the next highest priority for a given short - name; if it is already taken, then the variable is treated as - if short_name[] had been empty. Otherwise, long names are - truncated to form short names. If that causes conflicts, - variables are renamed as PREFIX_A, PREFIX_B, and so on. */ -void -dict_assign_short_names (struct dictionary *d) -{ - struct hsh_table *short_names; - size_t i; - - /* Give variables whose names are short the corresponding short - names, and clear short_names[] that conflict with a variable - name. */ - for (i = 0; i < d->var_cnt; i++) - { - struct variable *v = d->var[i]; - const char *short_name = var_get_short_name (v); - if (strlen (var_get_name (v)) <= SHORT_NAME_LEN) - var_set_short_name (v, var_get_name (v)); - else if (short_name != NULL && dict_lookup_var (d, short_name) != NULL) - var_clear_short_name (v); - } - - /* Each variable with an assigned short_name[] now gets it - unless there is a conflict. */ - short_names = hsh_create (d->var_cnt, compare_strings, hash_string, - NULL, NULL); - for (i = 0; i < d->var_cnt; i++) - { - struct variable *v = d->var[i]; - const char *name = var_get_short_name (v); - if (name != NULL && hsh_insert (short_names, (char *) name) != NULL) - var_clear_short_name (v); - } - - /* Now assign short names to remaining variables. */ - for (i = 0; i < d->var_cnt; i++) - { - struct variable *v = d->var[i]; - const char *name = var_get_short_name (v); - if (name == NULL) - { - /* Form initial short_name from the variable name, then - try _A, _B, ... _AA, _AB, etc., if needed.*/ - int trial = 0; - do - { - if (trial == 0) - var_set_short_name (v, var_get_name (v)); - else - set_var_short_name_suffix (v, var_get_name (v), trial - 1); - - trial++; - } - while (hsh_insert (short_names, (char *) var_get_short_name (v)) - != NULL); - } - } - - /* Get rid of hash table. */ - hsh_destroy (short_names); -} - - /* Called from variable.c to notify the dictionary that some property of the variable has changed */ void diff --git a/src/data/dictionary.h b/src/data/dictionary.h index 4887a310..6eb5c724 100644 --- a/src/data/dictionary.h +++ b/src/data/dictionary.h @@ -147,6 +147,4 @@ const struct vector *dict_lookup_vector (const struct dictionary *, const char *name); void dict_clear_vectors (struct dictionary *); -void dict_assign_short_names (struct dictionary *); - #endif /* dictionary.h */ diff --git a/src/data/por-file-writer.c b/src/data/por-file-writer.c index b956ef45..5f91c7e6 100644 --- a/src/data/por-file-writer.c +++ b/src/data/por-file-writer.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -311,7 +312,7 @@ write_variables (struct pfm_writer *w, struct dictionary *dict) { int i; - dict_assign_short_names (dict); + short_names_assign (dict); buf_write (w, "4", 1); write_int (w, dict_get_var_cnt (dict)); @@ -324,7 +325,7 @@ write_variables (struct pfm_writer *w, struct dictionary *dict) buf_write (w, "7", 1); write_int (w, var_get_width (v)); - write_string (w, var_get_short_name (v)); + write_string (w, var_get_short_name (v, 0)); write_format (w, var_get_print_format (v)); write_format (w, var_get_write_format (v)); @@ -386,7 +387,7 @@ write_value_labels (struct pfm_writer *w, const struct dictionary *dict) buf_write (w, "D", 1); write_int (w, 1); - write_string (w, var_get_short_name (v)); + write_string (w, var_get_short_name (v, 0)); write_int (w, val_labs_count (val_labs)); for (vl = val_labs_first_sorted (val_labs, &j); vl != NULL; diff --git a/src/data/short-names.c b/src/data/short-names.c new file mode 100644 index 00000000..3ed99c04 --- /dev/null +++ b/src/data/short-names.c @@ -0,0 +1,204 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +/* Compares two strings. */ +static int +compare_strings (const void *a, const void *b, const void *aux UNUSED) +{ + return strcmp (a, b); +} + +/* Hashes a string. */ +static unsigned +hash_string (const void *s, const void *aux UNUSED) +{ + return hsh_hash_string (s); +} + +/* Sets V's short name to BASE, followed by a suffix of the form + _A, _B, _C, ..., _AA, _AB, etc. according to the value of + SUFFIX_NUMBER. Truncates BASE as necessary to fit. */ +static void +set_var_short_name_suffix (struct variable *v, size_t i, + const char *base, int suffix_number) +{ + char suffix[SHORT_NAME_LEN + 1]; + char short_name[SHORT_NAME_LEN + 1]; + char *start, *end; + int len, ofs; + + assert (suffix_number >= 0); + + /* Set base name. */ + var_set_short_name (v, i, base); + + /* Compose suffix. */ + start = end = suffix + sizeof suffix - 1; + *end = '\0'; + do + { + *--start = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[suffix_number % 26]; + if (start <= suffix + 1) + msg (SE, _("Variable suffix too large.")); + suffix_number /= 26; + } + while (suffix_number > 0); + *--start = '_'; + + /* Append suffix to V's short name. */ + str_copy_trunc (short_name, sizeof short_name, base); + len = end - start; + if (len + strlen (short_name) > SHORT_NAME_LEN) + ofs = SHORT_NAME_LEN - len; + else + ofs = strlen (short_name); + strcpy (short_name + ofs, start); + + /* Set name. */ + var_set_short_name (v, i, short_name); +} + +static void +claim_short_name (struct variable *v, size_t i, struct hsh_table *short_names) +{ + const char *short_name = var_get_short_name (v, i); + if (short_name != NULL + && hsh_insert (short_names, (char *) short_name) != NULL) + var_set_short_name (v, i, NULL); +} + +/* Form initial short_name from the variable name, then try _A, + _B, ... _AA, _AB, etc., if needed. */ +static void +assign_short_name (struct variable *v, size_t i, struct hsh_table *short_names) +{ + int trial; + + if (var_get_short_name (v, i) != NULL) + return; + + for (trial = 0; ; trial++) + { + if (trial == 0) + var_set_short_name (v, i, var_get_name (v)); + else + set_var_short_name_suffix (v, i, var_get_name (v), trial - 1); + + if (hsh_insert (short_names, (char *) var_get_short_name (v, i)) == NULL) + break; + } +} + +/* Assigns a valid, unique short_name[] to each variable in D. + Each variable whose actual name is short has highest priority + for that short name. Otherwise, variables with an existing + short_name[] have the next highest priority for a given short + name; if it is already taken, then the variable is treated as + if short_name[] had been empty. Otherwise, long names are + truncated to form short names. If that causes conflicts, + variables are renamed as PREFIX_A, PREFIX_B, and so on. */ +void +short_names_assign (struct dictionary *d) +{ + size_t var_cnt = dict_get_var_cnt (d); + struct hsh_table *short_names; + size_t i, j; + + /* Create hash used for detecting conflicts. The entries in + the hash table point to strings owned by dictionary + variables, not by us, so we don't need to provide a free + function. */ + short_names = hsh_create (var_cnt, compare_strings, hash_string, + NULL, NULL); + + /* Clear short names that conflict with a variable name. */ + for (i = 0; i < var_cnt; i++) + { + struct variable *v = dict_get_var (d, i); + int segment_cnt = sfm_width_to_segments (var_get_width (v)); + for (j = 0; j < segment_cnt; j++) + { + const char *name = var_get_short_name (v, j); + if (name != NULL) + { + struct variable *ov = dict_lookup_var (d, name); + if (ov != NULL && (ov != v || j > 0)) + var_set_short_name (v, j, NULL); + } + } + } + + /* Give variables whose names are short the corresponding short + name. */ + for (i = 0; i < var_cnt; i++) + { + struct variable *v = dict_get_var (d, i); + if (strlen (var_get_name (v)) <= SHORT_NAME_LEN) + var_set_short_name (v, 0, var_get_name (v)); + } + + /* Each variable with an assigned short name for its first + segment now gets it unless there is a conflict. In case of + conflict, the claimant earlier in dictionary order wins. + Then similarly for additional segments of very long + strings. */ + for (i = 0; i < var_cnt; i++) + { + struct variable *v = dict_get_var (d, i); + claim_short_name (v, 0, short_names); + } + for (i = 0; i < var_cnt; i++) + { + struct variable *v = dict_get_var (d, i); + int segment_cnt = sfm_width_to_segments (var_get_width (v)); + for (j = 1; j < segment_cnt; j++) + claim_short_name (v, i, short_names); + } + + /* Assign short names to first segment of remaining variables, + then similarly for additional segments. */ + for (i = 0; i < var_cnt; i++) + { + struct variable *v = dict_get_var (d, i); + assign_short_name (v, 0, short_names); + } + for (i = 0; i < var_cnt; i++) + { + struct variable *v = dict_get_var (d, i); + int segment_cnt = sfm_width_to_segments (var_get_width (v)); + for (j = 1; j < segment_cnt; j++) + assign_short_name (v, j, short_names); + } + + /* Get rid of hash table. */ + hsh_destroy (short_names); +} diff --git a/src/data/short-names.h b/src/data/short-names.h new file mode 100644 index 00000000..d5bbb205 --- /dev/null +++ b/src/data/short-names.h @@ -0,0 +1,38 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Short names for variables. + + PSPP allows variable names to be up to 64 bytes long, but the + system and portable file formats require that each variable + have a unique name no more than 8 bytes long, called its + "short name". Furthermore, each "very long" string variable + that is more than 255 bytes long has to be divided into + multiple long string variables within that limit, and each of + these segments must also have its own unique short name. + + The function in this module generates short names for + variables with long names or that have very long string + width. */ + +#ifndef DATA_SHORT_NAMES_H +#define DATA_SHORT_NAMES_H 1 + +struct dictionary; + +void short_names_assign (struct dictionary *); + +#endif /* data/short-names.h */ diff --git a/src/data/sys-file-private.c b/src/data/sys-file-private.c index f544a810..da485f5a 100644 --- a/src/data/sys-file-private.c +++ b/src/data/sys-file-private.c @@ -40,4 +40,16 @@ sfm_width_to_bytes (int width) } } +/* Returns the number of "segments" used for writing case data + for a variable of the given WIDTH. A segment is a physical + variable in the system file that represents some piece of a + logical variable as seen by a PSPP user. Only very long + string variables have more than one segment. */ +int +sfm_width_to_segments (int width) +{ + assert (width >= 0); + return (width < MIN_VERY_LONG_STRING ? 1 + : DIV_RND_UP (width, EFFECTIVE_LONG_STRING_LENGTH)); +} diff --git a/src/data/sys-file-private.h b/src/data/sys-file-private.h index a2a85c0d..7170f9f4 100644 --- a/src/data/sys-file-private.h +++ b/src/data/sys-file-private.h @@ -23,5 +23,6 @@ #define EFFECTIVE_LONG_STRING_LENGTH (MIN_VERY_LONG_STRING - 4) int sfm_width_to_bytes (int width); +int sfm_width_to_segments (int width); #endif /* data/sys-file-private.h */ diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index cecd7e3b..9a0c054c 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -279,7 +279,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, name, but we want to retain it, so re-set it explicitly. */ dict_rename_var (*dict, var, long_name); - var_set_short_name (var, short_name); + var_set_short_name (var, 0, short_name); } r->has_long_var_names = true; @@ -508,8 +508,8 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, _("Duplicate variable name `%s' within system file."), name); - /* Set the short name the same as the long name */ - var_set_short_name (var, var_get_name (var)); + /* Set the short name the same as the long name. */ + var_set_short_name (var, 0, var_get_name (var)); /* Get variable label, if any. */ if (has_variable_label != 0 && has_variable_label != 1) @@ -904,8 +904,9 @@ read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count, while (read_variable_to_value_map (r, dict, map, &var, &long_name, &warning_cnt)) { - char short_name[SHORT_NAME_LEN + 1]; - strcpy (short_name, var_get_short_name (var)); + char **short_names; + size_t short_name_cnt; + size_t i; /* Validate long name. */ if (!var_is_valid_name (long_name, false)) @@ -917,7 +918,7 @@ read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count, } /* Identify any duplicates. */ - if (strcasecmp (short_name, long_name) + if (strcasecmp (var_get_short_name (var, 0), long_name) && dict_lookup_var (dict, long_name) != NULL) { sys_warn (r, _("Duplicate long variable name `%s' " @@ -925,11 +926,26 @@ read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count, continue; } - /* Set long name. Renaming a variable may clear the short - name, but we want to retain it, so re-set it - explicitly. */ + /* Renaming a variable may clear its short names, but we + want to retain them, so we save them and re-set them + afterward. */ + short_name_cnt = var_get_short_name_cnt (var); + short_names = xnmalloc (short_name_cnt, sizeof *short_names); + for (i = 0; i < short_name_cnt; i++) + { + const char *s = var_get_short_name (var, i); + short_names[i] = s != NULL ? xstrdup (s) : NULL; + } + + /* Set long name. */ dict_rename_var (dict, var, long_name); - var_set_short_name (var, short_name); + + /* Restore short names. */ + for (i = 0; i < short_name_cnt; i++) + { + var_set_short_name (var, i, short_names[i]); + free (short_names[i]); + } } close_variable_to_value_map (r, map); r->has_long_var_names = true; @@ -1483,7 +1499,7 @@ lookup_var_by_short_name (struct dictionary *d, const char *short_name) /* First try looking up by full name. This often succeeds. */ var = dict_lookup_var (d, short_name); - if (var != NULL && !strcasecmp (var_get_short_name (var), short_name)) + if (var != NULL && !strcasecmp (var_get_short_name (var, 0), short_name)) return var; /* Iterate through the whole dictionary as a fallback. */ @@ -1491,7 +1507,7 @@ lookup_var_by_short_name (struct dictionary *d, const char *short_name) for (i = 0; i < var_cnt; i++) { var = dict_get_var (d, i); - if (!strcasecmp (var_get_short_name (var), short_name)) + if (!strcasecmp (var_get_short_name (var, 0), short_name)) return var; } diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index c5e20736..a0771ec5 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -195,28 +196,6 @@ sfm_writer_default_options (void) return opts; } - -/* Return a short variable name to be used as the continuation of the - variable with the short name SN. - - FIXME: Need to resolve clashes somehow. - - */ -static const char * -cont_var_name(const char *sn, int idx) -{ - static char s[SHORT_NAME_LEN + 1]; - - char abb[SHORT_NAME_LEN + 1 - 3]= {0}; - - strncpy(abb, sn, SHORT_NAME_LEN - 3); - - snprintf(s, SHORT_NAME_LEN + 1, "%s%03d", abb, idx); - - return s; -} - - /* Opens the system file designated by file handle FH for writing cases from dictionary D according to the given OPTS. If COMPRESS is nonzero, the system file will be compressed. @@ -295,7 +274,7 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, write_header (w, d); /* Write basic variable info. */ - dict_assign_short_names (d); + short_names_assign (d); for (i = 0; i < dict_get_var_cnt (d); i++) { int count = 0; @@ -304,15 +283,14 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, do { struct variable *var_cont = var_clone (v); - var_set_short_name (var_cont, var_get_short_name (v)); + var_set_short_name (var_cont, 0, var_get_short_name (v, 0)); if ( var_is_alpha (v)) { if ( 0 != count ) { var_clear_missing_values (var_cont); - var_set_short_name (var_cont, - cont_var_name (var_get_short_name (v), - count)); + var_set_short_name (var_cont, 0, + var_get_short_name (v, count)); var_clear_label (var_cont); w->var_cnt_vls++; } @@ -548,7 +526,7 @@ write_variable (struct sfm_writer *w, const struct variable *v) sv.n_missing_values = nm; write_format_spec (var_get_print_format (v), &sv.print); write_format_spec (var_get_write_format (v), &sv.write); - buf_copy_str_rpad (sv.name, sizeof sv.name, var_get_short_name (v)); + buf_copy_str_rpad (sv.name, sizeof sv.name, var_get_short_name (v, 0)); buf_write (w, &sv, sizeof sv); if (label != NULL) @@ -769,7 +747,7 @@ write_vls_length_table (struct sfm_writer *w, continue; ds_put_format (&vls_length_map, "%s=%05d", - var_get_short_name (v), var_get_width (v)); + var_get_short_name (v, 0), var_get_width (v)); ds_put_char (&vls_length_map, '\0'); ds_put_char (&vls_length_map, '\t'); } @@ -808,7 +786,7 @@ write_longvar_table (struct sfm_writer *w, const struct dictionary *dict) if (i) ds_put_char (&long_name_map, '\t'); ds_put_format (&long_name_map, "%s=%s", - var_get_short_name (v), var_get_name (v)); + var_get_short_name (v, 0), var_get_name (v)); } lv_hdr.rec_type = 7; diff --git a/src/data/variable.c b/src/data/variable.c index 22159d87..59f45221 100644 --- a/src/data/variable.c +++ b/src/data/variable.c @@ -19,7 +19,6 @@ #include - #include "category.h" #include "data-out.h" #include "format.h" @@ -63,11 +62,10 @@ struct variable /* Data for use by containing dictionary. */ struct vardict_info vardict; - /* Short name, used only for system and portable file input - and output. Upper case only. Short names are not necessarily - unique. Any variable may have no short name, indicated by an - empty string. */ - char short_name[SHORT_NAME_LEN + 1]; + /* Used only for system and portable file input and output. + See short-names.h. */ + char **short_names; + size_t short_name_cnt; /* Each command may use these fields as needed. */ void *aux; @@ -127,7 +125,8 @@ var_create (const char *name, int width) v->write = v->print; v->val_labs = NULL; v->label = NULL; - var_clear_short_name (v); + v->short_names = NULL; + v->short_name_cnt = 0; v->aux = NULL; v->aux_dtor = NULL; v->obs_vals = NULL; @@ -786,47 +785,80 @@ var_must_leave (const struct variable *v) return dict_class_from_id (v->name) == DC_SCRATCH; } -/* Returns V's short name, if it has one, or a null pointer - otherwise. +/* Returns the number of short names stored in VAR. Short names are used only for system and portable file input and output. They are upper-case only, not necessarily unique, and limited to SHORT_NAME_LEN characters (plus a null - terminator). Any variable may have no short name, indicated - by returning a null pointer. */ + terminator). Ordinarily a variable has at most one short + name, but very long string variables (longer than 255 bytes) + may have more. A variable might not have any short name at + all if it hasn't been saved to or read from a system or + portable file. */ +size_t +var_get_short_name_cnt (const struct variable *var) +{ + return var->short_name_cnt; +} + +/* Returns VAR's short name with the given IDX, if it has one + with that index, or a null pointer otherwise. Short names may + be sparse: even if IDX is less than the number of short names + in VAR, this function may return a null pointer. */ const char * -var_get_short_name (const struct variable *v) +var_get_short_name (const struct variable *var, size_t idx) { - return v->short_name[0] != '\0' ? v->short_name : NULL; + return idx < var->short_name_cnt ? var->short_names[idx] : NULL; } -/* Sets V's short_name to SHORT_NAME, truncating it to - SHORT_NAME_LEN characters and converting it to uppercase in - the process. Specifying a null pointer for SHORT_NAME clears - the variable's short name. */ +/* Sets VAR's short name with the given IDX to SHORT_NAME, + truncating it to SHORT_NAME_LEN characters and converting it + to uppercase in the process. Specifying a null pointer for + SHORT_NAME clears the specified short name. */ void -var_set_short_name (struct variable *v, const char *short_name) +var_set_short_name (struct variable *var, size_t idx, const char *short_name) { - assert (v != NULL); + assert (var != NULL); assert (short_name == NULL || var_is_plausible_name (short_name, false)); - if (short_name != NULL) + /* Clear old short name numbered IDX, if any. */ + if (idx < var->short_name_cnt) { - str_copy_trunc (v->short_name, sizeof v->short_name, short_name); - str_uppercase (v->short_name); + free (var->short_names[idx]); + var->short_names[idx] = NULL; } - else - v->short_name[0] = '\0'; - dict_var_changed (v); + + /* Install new short name for IDX. */ + if (short_name != NULL) + { + if (idx >= var->short_name_cnt) + { + size_t old_cnt = var->short_name_cnt; + size_t i; + + var->short_name_cnt = MAX (idx * 2, 1); + var->short_names = xnrealloc (var->short_names, var->short_name_cnt, + sizeof *var->short_names); + for (i = old_cnt; i < var->short_name_cnt; i++) + var->short_names[i] = NULL; + } + var->short_names[idx] = xstrndup (short_name, MAX_SHORT_STRING); + str_uppercase (var->short_names[idx]); + } + + dict_var_changed (var); } -/* Clears V's short name. */ +/* Clears V's short names. */ void -var_clear_short_name (struct variable *v) -{ - assert (v != NULL); - - v->short_name[0] = '\0'; +var_clear_short_names (struct variable *v) +{ + size_t i; + + for (i = 0; i < v->short_name_cnt; i++) + free (v->short_names[i]); + v->short_names = NULL; + v->short_name_cnt = 0; } /* Relationship with dictionary. */ diff --git a/src/data/variable.h b/src/data/variable.h index 9cc2b844..31b046bc 100644 --- a/src/data/variable.h +++ b/src/data/variable.h @@ -143,9 +143,10 @@ void var_set_leave (struct variable *, bool leave); bool var_must_leave (const struct variable *); /* Short names. */ -const char *var_get_short_name (const struct variable *); -void var_set_short_name (struct variable *, const char *); -void var_clear_short_name (struct variable *); +size_t var_get_short_name_cnt (const struct variable *); +const char *var_get_short_name (const struct variable *, size_t idx); +void var_set_short_name (struct variable *, size_t, const char *); +void var_clear_short_names (struct variable *); /* Relationship with dictionary. */ size_t var_get_dict_index (const struct variable *); -- 2.30.2