From c725a4f64718ef1ee4139c27c94b2eb6447b51b4 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 2 Feb 2008 06:58:12 +0000 Subject: [PATCH] Patch #6386. Thanks to John Darrington for review and for the updates to gnumeric-reader.c. * dictionary.c (make_hinted_name): New function. (make_numeric_name): New function. (dict_make_unique_var_name): New function. * gnumeric-reader.c (devise_name): Removed. (munge_name): Removed. (gnumeric_open_reader): Use new function dict_make_unique_var_name. * short-names.c (set_var_short_name_suffix): Use new function str_format_26adic. --- src/data/ChangeLog | 17 ++++++ src/data/dictionary.c | 108 ++++++++++++++++++++++++++++++++++ src/data/dictionary.h | 5 +- src/data/gnumeric-reader.c | 83 ++++---------------------- src/data/short-names.c | 23 +++----- src/libpspp/ChangeLog | 6 ++ src/libpspp/str.c | 35 +++++++++++ src/libpspp/str.h | 2 + tests/ChangeLog | 9 +++ tests/automake.mk | 5 ++ tests/command/get-data-gnm.sh | 12 ++-- tests/libpspp/str-test.c | 83 ++++++++++++++++++++++++++ 12 files changed, 293 insertions(+), 95 deletions(-) create mode 100644 tests/libpspp/str-test.c diff --git a/src/data/ChangeLog b/src/data/ChangeLog index 27619661..558bb249 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,20 @@ +2008-02-01 Ben Pfaff + + Patch #6386. Thanks to John Darrington for review and for the + updates to gnumeric-reader.c. + + * dictionary.c (make_hinted_name): New function. + (make_numeric_name): New function. + (dict_make_unique_var_name): New function. + + * gnumeric-reader.c (devise_name): Removed. + (munge_name): Removed. + (gnumeric_open_reader): Use new function + dict_make_unique_var_name. + + * short-names.c (set_var_short_name_suffix): Use new function + str_format_26adic. + 2008-01-19 John Darrington * settings.c settings.h: Moved static variables into a diff --git a/src/data/dictionary.c b/src/data/dictionary.c index 85b6a349..0856df93 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -23,12 +23,14 @@ #include "case.h" #include "category.h" +#include "identifier.h" #include "settings.h" #include "value-labels.h" #include "vardict.h" #include "variable.h" #include "vector.h" #include +#include #include #include #include @@ -36,6 +38,7 @@ #include #include +#include "intprops.h" #include "minmax.h" #include "xalloc.h" @@ -726,6 +729,111 @@ dict_rename_vars (struct dictionary *d, return true; } +static bool +make_hinted_name (const struct dictionary *dict, const char *hint, + char name[VAR_NAME_LEN + 1]) +{ + bool dropped = false; + char *cp; + + for (cp = name; *hint && cp < name + VAR_NAME_LEN; hint++) + { + if (cp == name + ? lex_is_id1 (*hint) && *hint != '$' + : lex_is_idn (*hint)) + { + if (dropped) + { + *cp++ = '_'; + dropped = false; + } + if (cp < name + VAR_NAME_LEN) + *cp++ = *hint; + } + else if (cp > name) + dropped = true; + } + *cp = '\0'; + + if (name[0] != '\0') + { + size_t len = strlen (name); + unsigned long int i; + + if (dict_lookup_var (dict, name) == NULL) + return true; + + for (i = 0; i < ULONG_MAX; i++) + { + char suffix[INT_BUFSIZE_BOUND (i) + 1]; + int ofs; + + suffix[0] = '_'; + if (!str_format_26adic (i + 1, &suffix[1], sizeof suffix - 1)) + NOT_REACHED (); + + ofs = MIN (VAR_NAME_LEN - strlen (suffix), len); + strcpy (&name[ofs], suffix); + + if (dict_lookup_var (dict, name) == NULL) + return true; + } + } + + return false; +} + +static bool +make_numeric_name (const struct dictionary *dict, unsigned long int *num_start, + char name[VAR_NAME_LEN + 1]) +{ + unsigned long int number; + + for (number = num_start != NULL ? MAX (*num_start, 1) : 1; + number < ULONG_MAX; + number++) + { + sprintf (name, "VAR%03lu", number); + if (dict_lookup_var (dict, name) == NULL) + { + if (num_start != NULL) + *num_start = number + 1; + return true; + } + } + + if (num_start != NULL) + *num_start = ULONG_MAX; + return false; +} + + +/* Attempts to devise a variable name unique within DICT. + Returns true if successful, in which case the new variable + name is stored into NAME. Returns false if all names that can + be generated have already been taken. (Returning false is + quite unlikely: at least ULONG_MAX unique names can be + generated.) + + HINT, if it is non-null, is used as a suggestion that will be + modified for suitability as a variable name and for + uniqueness. + + If HINT is null or entirely unsuitable, a name in the form + "VAR%03d" will be generated, where the smallest unused integer + value is used. If NUM_START is non-null, then its value is + used as the minimum numeric value to check, and it is updated + to the next value to be checked. + */ +bool +dict_make_unique_var_name (const struct dictionary *dict, const char *hint, + unsigned long int *num_start, + char name[VAR_NAME_LEN + 1]) +{ + return ((hint != NULL && make_hinted_name (dict, hint, name)) + || make_numeric_name (dict, num_start, name)); +} + /* Returns the weighting variable in dictionary D, or a null pointer if the dictionary is unweighted. */ struct variable * diff --git a/src/data/dictionary.h b/src/data/dictionary.h index 167487fb..0ab259d3 100644 --- a/src/data/dictionary.h +++ b/src/data/dictionary.h @@ -75,11 +75,14 @@ void dict_reorder_var (struct dictionary *, struct variable *, void dict_reorder_vars (struct dictionary *, struct variable *const *, size_t count); -/* Changing variable names. */ +/* Variable names. */ void dict_rename_var (struct dictionary *, struct variable *, const char *); bool dict_rename_vars (struct dictionary *, struct variable **, char **new_names, size_t count, char **err_name); +bool dict_make_unique_var_name (const struct dictionary *, const char *hint, + unsigned long int *num_start, + char name[]); /* Weight variable. */ double dict_get_case_weight (const struct dictionary *, diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c index 418d63c2..52217664 100644 --- a/src/data/gnumeric-reader.c +++ b/src/data/gnumeric-reader.c @@ -308,66 +308,6 @@ process_node (struct gnumeric_reader *r) } - -/* - Change SUGGESTION until it's a valid name that can be added to DICT. -*/ -static void -devise_name (const struct dictionary *dict, struct string *name, int *x) -{ - struct string basename; - if ( ds_is_empty (name)) - ds_init_cstr (&basename, "var"); - else - ds_init_string (&basename, name); - do - { - ds_clear (name); - ds_put_format (name, "%s%d", ds_cstr (&basename), ++(*x)); - } - while (NULL != dict_lookup_var (dict, ds_cstr (name)) ); - - ds_destroy (&basename); -} - -/* - Mutate NAME of a variable, which is gauranteed to be valid for the - dictionary DICT. -*/ -static void -munge_name (const struct dictionary *dict, struct string *name) -{ - int x = 0; - - if (! ds_is_empty (name)) - { - /* Change all the invalid characters to valid ones */ - char *s; - - s = ds_data (name); - - if ( !lex_is_id1 (*s)) - *s = '@'; - - s++; - - while (s < ds_data (name) + ds_length (name)) - { - if ( !lex_is_idn (*s)) - *s = '_'; - s++; - } - - assert (var_is_valid_name (ds_cstr (name), false)); - } - - while (ds_is_empty (name) || NULL != dict_lookup_var (dict, ds_cstr (name)) ) - { - devise_name (dict, name, &x); - } -} - - /* Sets the VAR of case C, to the value corresponding to the xml string XV */ @@ -410,6 +350,7 @@ struct var_spec struct casereader * gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict) { + unsigned long int vstart = 0; int ret; casenumber n_cases = CASENUMBER_MAX; int i; @@ -567,25 +508,23 @@ gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict) for (i = 0 ; i < n_var_specs ; ++i ) { - struct string name; + char name[VAR_NAME_LEN + 1]; - /* Probably no data exists for this variable, so allocate a default width */ + /* Probably no data exists for this variable, so allocate a + default width */ if ( var_spec[i].width == -1 ) var_spec[i].width = MAX_SHORT_STRING; r->value_cnt += value_cnt_from_width (var_spec[i].width); - if (var_spec[i].name) - ds_init_cstr (&name, var_spec[i].name); - else - ds_init_empty (&name); - - munge_name (r->dict, &name); - - - dict_create_var (r->dict, ds_cstr (&name), var_spec[i].width); + if ( ! dict_make_unique_var_name (r->dict, var_spec[i].name, + &vstart, name)) + { + msg (ME, _("Cannot create variable name from %s"), var_spec[i].name); + goto error; + } - ds_destroy (&name); + dict_create_var (r->dict, name, var_spec[i].width); } /* Create the first case, and cache it */ diff --git a/src/data/short-names.c b/src/data/short-names.c index 6a997f53..08bf7587 100644 --- a/src/data/short-names.c +++ b/src/data/short-names.c @@ -53,7 +53,6 @@ set_var_short_name_suffix (struct variable *v, size_t i, { char suffix[SHORT_NAME_LEN + 1]; char short_name[SHORT_NAME_LEN + 1]; - char *start, *end; int len, ofs; assert (suffix_number >= 0); @@ -62,26 +61,18 @@ set_var_short_name_suffix (struct variable *v, size_t i, var_set_short_name (v, i, base); /* Compose suffix. */ - start = end = suffix + sizeof suffix - 1; - *end = '\0'; - do - { - *--start = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[suffix_number % 26]; - if (start <= suffix + 1) - msg (SE, _("Variable suffix too large.")); - suffix_number /= 26; - } - while (suffix_number > 0); - *--start = '_'; + suffix[0] = '_'; + if (!str_format_26adic (suffix_number, &suffix[1], sizeof suffix - 1)) + msg (SE, _("Variable suffix too large.")); + len = strlen (suffix); /* Append suffix to V's short name. */ str_copy_trunc (short_name, sizeof short_name, base); - len = end - start; - if (len + strlen (short_name) > SHORT_NAME_LEN) + if (strlen (short_name) + len > SHORT_NAME_LEN) ofs = SHORT_NAME_LEN - len; else ofs = strlen (short_name); - strcpy (short_name + ofs, start); + strcpy (short_name + ofs, suffix); /* Set name. */ var_set_short_name (v, i, short_name); @@ -111,7 +102,7 @@ assign_short_name (struct variable *v, size_t i, struct hsh_table *short_names) if (trial == 0) var_set_short_name (v, i, var_get_name (v)); else - set_var_short_name_suffix (v, i, var_get_name (v), trial - 1); + set_var_short_name_suffix (v, i, var_get_name (v), trial); if (hsh_insert (short_names, (char *) var_get_short_name (v, i)) == NULL) break; diff --git a/src/libpspp/ChangeLog b/src/libpspp/ChangeLog index 502b9292..8547567e 100644 --- a/src/libpspp/ChangeLog +++ b/src/libpspp/ChangeLog @@ -1,3 +1,9 @@ +2008-02-01 Ben Pfaff + + Patch #6386. Thanks to John Darrington for review. + + * str.c (str_format_26adic): New function. + 2007-12-24 John Darrington * taint.c (taint_destroy): Return true if pointer is null. diff --git a/src/libpspp/str.c b/src/libpspp/str.c index 225c559d..b169f0d7 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -252,6 +252,41 @@ str_lowercase (char *s) *s = tolower ((unsigned char) *s); } +/* Converts NUMBER into a string in 26-adic notation in BUFFER, + which has room for SIZE bytes. Returns true if successful, + false if NUMBER, plus a trailing null, is too large to fit in + the available space. + + 26-adic notation is "spreadsheet column numbering": 1 = A, 2 = + B, 3 = C, ... 26 = Z, 27 = AA, 28 = AB, 29 = AC, ... + + 26-adic notation is the special case of a k-adic numeration + system (aka bijective base-k numeration) with k=26. In k-adic + numeration, the digits are {1, 2, 3, ..., k} (there is no + digit 0), and integer 0 is represented by the empty string. + For more information, see + http://en.wikipedia.org/wiki/Bijective_numeration. */ +bool +str_format_26adic (unsigned long int number, char buffer[], size_t size) +{ + size_t length = 0; + + while (number-- > 0) + { + if (length >= size) + return false; + buffer[length++] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[number % 26]; + number /= 26; + } + + if (length >= size) + return false; + buffer[length] = '\0'; + + buf_reverse (buffer, length); + return true; +} + /* Formats FORMAT into DST, as with sprintf(), and returns the address of the terminating null written to DST. */ char * diff --git a/src/libpspp/str.h b/src/libpspp/str.h index 8357732c..50671d90 100644 --- a/src/libpspp/str.h +++ b/src/libpspp/str.h @@ -46,6 +46,8 @@ void str_copy_buf_trunc (char *, size_t, const char *, size_t); void str_uppercase (char *); void str_lowercase (char *); +bool str_format_26adic (unsigned long int number, char buffer[], size_t); + char *spprintf (char *dst, const char *format, ...); void *mempset (void *, int, size_t); diff --git a/tests/ChangeLog b/tests/ChangeLog index efc4f8da..6511612f 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,12 @@ +2008-02-01 Ben Pfaff + + * automake.mk: Add new test. + + * libpspp/str-test.c: New test. + + * command/get-dat-gnm.sh: Update variable names to match new + naming scheme. + 2007-12-04 Ben Pfaff * automake.mk: Add new tests. diff --git a/tests/automake.mk b/tests/automake.mk index be303c1f..b8ab5d63 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -163,6 +163,7 @@ nodist_TESTS = \ tests/libpspp/range-map-test \ tests/libpspp/range-set-test \ tests/libpspp/sparse-array-test \ + tests/libpspp/str-test \ tests/libpspp/tower-test TESTS = $(dist_TESTS) $(nodist_TESTS) @@ -228,6 +229,10 @@ tests_libpspp_range_set_test_SOURCES = \ tests_libpspp_range_set_test_LDADD = gl/libgl.la @LIBINTL@ tests_libpspp_range_set_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 +tests_libpspp_str_test_SOURCES = \ + tests/libpspp/str-test.c +tests_libpspp_str_test_LDADD = src/libpspp/libpspp.a gl/libgl.la @LIBINTL@ + tests_libpspp_tower_test_SOURCES = \ src/libpspp/abt.c \ src/libpspp/abt.h \ diff --git a/tests/command/get-data-gnm.sh b/tests/command/get-data-gnm.sh index 2cad373f..70cb24d1 100755 --- a/tests/command/get-data-gnm.sh +++ b/tests/command/get-data-gnm.sh @@ -96,23 +96,23 @@ diff $TEMPDIR/pspp.list - <. */ + +#include + +#include + +#include +#include +#include + +/* Currently running test. */ +static const char *test_name; + +/* Exit with a failure code. + (Place a breakpoint on this function while debugging.) */ +static void +check_die (void) +{ + exit (EXIT_FAILURE); +} + +static void +check_26adic (unsigned long int number, const char *expected_string) +{ + char string[8]; + str_format_26adic (number, string, sizeof string); + if (strcmp (string, expected_string)) + { + printf ("base-26 of %lu: expected \"%s\", got \"%s\"\n", + number, expected_string, string); + check_die (); + } +} + +static void +test_str_format_26adic (void) +{ + check_26adic (0, ""); + check_26adic (1, "A"); + check_26adic (2, "B"); + check_26adic (26, "Z"); + check_26adic (27, "AA"); + check_26adic (28, "AB"); + check_26adic (29, "AC"); + check_26adic (18278, "ZZZ"); + check_26adic (18279, "AAAA"); + check_26adic (19010, "ABCD"); +} + +/* Main program. */ + +/* Runs TEST_FUNCTION and prints a message about NAME. */ +static void +run_test (void (*test_function) (void), const char *name) +{ + test_name = name; + putchar ('.'); + fflush (stdout); + test_function (); +} + +int +main (void) +{ + run_test (test_str_format_26adic, "format 26-adic strings"); + putchar ('\n'); + + return 0; +} -- 2.30.2